From 3d4c2c4323b16c48d342cacde3fcf0c3d694b9fa Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 12 Jul 2024 16:34:26 -0700 Subject: [PATCH 1/3] Add pattern serde derive test --- utils/pattern/src/frontend/serde.rs | 43 +++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/utils/pattern/src/frontend/serde.rs b/utils/pattern/src/frontend/serde.rs index 19f38010ebe..724ade1a096 100644 --- a/utils/pattern/src/frontend/serde.rs +++ b/utils/pattern/src/frontend/serde.rs @@ -78,20 +78,29 @@ mod tests { use super::*; use crate::SinglePlaceholderPattern; + #[derive(Debug, PartialEq, Deserialize, Serialize)] + struct TestDerive<'a> { + #[serde(borrow)] + pub pattern_cow: SinglePlaceholderPattern>, + } + #[test] fn test_json() { let pattern_owned = SinglePlaceholderPattern::from_str("Hello, {0}!").unwrap(); let pattern_cow: SinglePlaceholderPattern> = SinglePlaceholderPattern::from_store_unchecked(Cow::Owned(pattern_owned.take_store())); - let pattern_json = serde_json::to_string(&pattern_cow).unwrap(); + let pattern_obj = TestDerive { pattern_cow }; + let pattern_json = serde_json::to_string(&pattern_obj).unwrap(); assert_eq!( pattern_json, - r#"[{"Literal":"Hello, "},{"Placeholder":"Singleton"},{"Literal":"!"}]"# + r#"{"pattern_cow":[{"Literal":"Hello, "},{"Placeholder":"Singleton"},{"Literal":"!"}]}"# ); - let pattern_deserialized: SinglePlaceholderPattern> = - serde_json::from_str(&pattern_json).unwrap(); - assert_eq!(pattern_cow, pattern_deserialized); - assert!(matches!(pattern_deserialized.take_store(), Cow::Owned(_))); + let pattern_deserialized: TestDerive = serde_json::from_str(&pattern_json).unwrap(); + assert_eq!(pattern_obj, pattern_deserialized); + assert!(matches!( + pattern_deserialized.pattern_cow.take_store(), + Cow::Owned(_) + )); } #[test] @@ -99,13 +108,13 @@ mod tests { let pattern_owned = SinglePlaceholderPattern::from_str("Hello, {0}!").unwrap(); let pattern_cow: SinglePlaceholderPattern> = SinglePlaceholderPattern::from_store_unchecked(Cow::Owned(pattern_owned.take_store())); - let pattern_postcard = postcard::to_stdvec(&pattern_cow).unwrap(); + let pattern_obj = TestDerive { pattern_cow }; + let pattern_postcard = postcard::to_stdvec(&pattern_obj).unwrap(); assert_eq!(pattern_postcard, b"\x09\x08Hello, !"); - let pattern_deserialized: SinglePlaceholderPattern> = - postcard::from_bytes(&pattern_postcard).unwrap(); - assert_eq!(pattern_cow, pattern_deserialized); + let pattern_deserialized: TestDerive = postcard::from_bytes(&pattern_postcard).unwrap(); + assert_eq!(pattern_obj, pattern_deserialized); assert!(matches!( - pattern_deserialized.take_store(), + pattern_deserialized.pattern_cow.take_store(), Cow::Borrowed(_) )); } @@ -115,13 +124,13 @@ mod tests { let pattern_owned = SinglePlaceholderPattern::from_str("Hello, {0}!").unwrap(); let pattern_cow: SinglePlaceholderPattern> = SinglePlaceholderPattern::from_store_unchecked(Cow::Owned(pattern_owned.take_store())); - let pattern_rmp = rmp_serde::to_vec(&pattern_cow).unwrap(); - assert_eq!(pattern_rmp, b"\xA9\x08Hello, !"); - let pattern_deserialized: SinglePlaceholderPattern> = - rmp_serde::from_slice(&pattern_rmp).unwrap(); - assert_eq!(pattern_cow, pattern_deserialized); + let pattern_obj = TestDerive { pattern_cow }; + let pattern_rmp = rmp_serde::to_vec(&pattern_obj).unwrap(); + assert_eq!(pattern_rmp, b"\x91\xA9\x08Hello, !"); + let pattern_deserialized: TestDerive = rmp_serde::from_slice(&pattern_rmp).unwrap(); + assert_eq!(pattern_obj, pattern_deserialized); assert!(matches!( - pattern_deserialized.take_store(), + pattern_deserialized.pattern_cow.take_store(), Cow::Borrowed(_) )); } From c708ebcf718301134f6f85c943b8807a6cdcceae Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 12 Jul 2024 17:26:31 -0700 Subject: [PATCH 2/3] Generalize VarULE impl --- utils/pattern/src/common.rs | 22 +++++-- utils/pattern/src/double.rs | 11 +++- utils/pattern/src/frontend/mod.rs | 67 +++++++++++++++++---- utils/pattern/src/frontend/zerovec.rs | 48 ++++++++++++++++ utils/pattern/src/implementations.rs | 83 --------------------------- utils/pattern/src/lib.rs | 2 - utils/pattern/src/multi_named.rs | 11 +++- utils/pattern/src/single.rs | 11 +++- 8 files changed, 148 insertions(+), 107 deletions(-) create mode 100644 utils/pattern/src/frontend/zerovec.rs delete mode 100644 utils/pattern/src/implementations.rs diff --git a/utils/pattern/src/common.rs b/utils/pattern/src/common.rs index 053ab9854d7..dc743c09164 100644 --- a/utils/pattern/src/common.rs +++ b/utils/pattern/src/common.rs @@ -75,7 +75,7 @@ pub trait PatternBackend: crate::private::Sealed + 'static { /// The type of error that the [`TryWriteable`] for this backend can return. type Error<'a>; - /// The type of error that the [`PatternBackend::try_store_from_utf8`] can return. + /// The type of error that the [`PatternBackend::validate_store_bytes`] can return. type StoreFromBytesError; /// The unsized type of the store required for this backend, usually `str` or `[u8]`. @@ -85,10 +85,24 @@ pub trait PatternBackend: crate::private::Sealed + 'static { #[doc(hidden)] // TODO(#4467): Should be internal type Iter<'a>: Iterator>>; - /// Converts a byte slice store to this pattern backend's store. - /// Does not perform validation of the store. + /// Validates that the bytes are valid in the store type, + /// but not that the store is valid for this pattern. #[doc(hidden)] // TODO(#4467): Should be internal - fn try_store_from_utf8(bytes: &[u8]) -> Result<&Self::Store, Self::StoreFromBytesError>; + fn validate_store_bytes(bytes: &[u8]) -> Result<(), Self::StoreFromBytesError>; + + /// # Safety + /// + /// [`PatternBackend::validate_store_bytes`] *must* return `Ok` for the given bytes. + #[doc(hidden)] // TODO(#4467): Should be internal + unsafe fn store_from_bytes(bytes: &[u8]) -> &Self::Store; + + // /// Converts a byte slice store to this pattern backend's store. + // /// + // /// # Safety + // /// + // /// The bytes must have been previously cast via [`try_store_from_utf8`]. + // #[doc(hidden)] // TODO(#4467): Should be internal + // unsafe fn try_store_from_utf8(bytes: &[u8]) -> &Self::Store; /// Checks a store for validity, returning an error if invalid. #[doc(hidden)] // TODO(#4467): Should be internal diff --git a/utils/pattern/src/double.rs b/utils/pattern/src/double.rs index f95033807eb..3060c8dbd59 100644 --- a/utils/pattern/src/double.rs +++ b/utils/pattern/src/double.rs @@ -276,8 +276,15 @@ impl PatternBackend for DoublePlaceholder { type Iter<'a> = DoublePlaceholderPatternIterator<'a>; #[inline] - fn try_store_from_utf8(bytes: &[u8]) -> Result<&Self::Store, Self::StoreFromBytesError> { - core::str::from_utf8(bytes) + fn validate_store_bytes(bytes: &[u8]) -> Result<(), Self::StoreFromBytesError> { + core::str::from_utf8(bytes).map(|_| ()) + } + + #[inline] + unsafe fn store_from_bytes(bytes: &[u8]) -> &Self::Store { + // The invariant of this function is that `bytes` is valid + // according to `validate_store_bytes` + core::str::from_utf8_unchecked(bytes) } fn validate_store(store: &Self::Store) -> Result<(), Error> { diff --git a/utils/pattern/src/frontend/mod.rs b/utils/pattern/src/frontend/mod.rs index 2d0df63c286..05bfd748289 100644 --- a/utils/pattern/src/frontend/mod.rs +++ b/utils/pattern/src/frontend/mod.rs @@ -6,6 +6,9 @@ mod databake; #[cfg(feature = "serde")] mod serde; +#[cfg(feature = "zerovec")] +mod zerovec; + use crate::common::*; use crate::Error; use crate::PatternOrUtf8Error; @@ -83,6 +86,7 @@ use writeable::{adapters::TryWriteableInfallibleAsWriteable, PartsWrite, TryWrit derive(zerofrom::ZeroFrom), zerofrom(may_borrow(Store)) )] +#[repr(transparent)] pub struct Pattern { _backend: PhantomData, store: Store, @@ -164,11 +168,32 @@ where } } -impl<'a, B> Pattern +impl Pattern where B: PatternBackend, { - /// Creates a pattern from its store encoded as UTF-8. + /// Creates a borrowed DST pattern from a borrowed store. + #[inline] + pub fn try_from_borrowed_store<'a>(store: &'a B::Store) -> Result<&'a Self, Error> { + B::validate_store(store)?; + Ok(Self::from_borrowed_store_unchecked(store)) + } + + /// Creates a borrowed DST pattern from a borrowed store. + /// + /// The store is expected to come from a valid `Pattern` with this `Backend`, + /// such as by calling [`Pattern::take_store()`]. If the store is not valid, + /// unexpected behavior may occur. + #[inline] + pub const fn from_borrowed_store_unchecked<'a>(store: &'a B::Store) -> &'a Self { + // Safety: `Pattern` is transparent over `B::Store` + unsafe { &*(store as *const B::Store as *const Pattern) } + } + + /// Creates a pattern from its store encoded as bytes. + /// + /// The bytes are checked to be a valid store in their entirety, + /// and the same reference is returned. /// /// # Examples /// @@ -176,18 +201,36 @@ where /// use icu_pattern::Pattern; /// use icu_pattern::SinglePlaceholder; /// - /// Pattern::::try_from_utf8_store(b"\x01 days") + /// Pattern::::try_from_bytes_store(b"\x01 days") /// .expect("single placeholder pattern"); /// ``` - pub fn try_from_utf8_store( - code_units: &'a [u8], - ) -> Result> { - let store = B::try_store_from_utf8(code_units).map_err(PatternOrUtf8Error::Utf8)?; - B::validate_store(store).map_err(PatternOrUtf8Error::Pattern)?; - Ok(Self { - _backend: PhantomData, - store, - }) + #[inline] + pub fn try_from_bytes_store<'a>( + store: &'a [u8], + ) -> Result<&'a Self, PatternOrUtf8Error> { + match B::validate_store_bytes(store) { + Ok(()) => (), + Err(e) => { + return Err(PatternOrUtf8Error::Utf8(e)); + } + } + // Safety: B::validate_store_bytes just succeeded + let store = unsafe { B::store_from_bytes(store) }; + Self::try_from_borrowed_store(store).map_err(PatternOrUtf8Error::Pattern) + } + + /// Creates a pattern from its store encoded as bytes. + /// + /// The same reference is returned. + /// + /// # Safety + /// + /// The bytes *must* be successfully convertible via [`Self::try_from_bytes_store`]. + #[inline] + pub unsafe fn from_bytes_store_unchecked<'a>(store: &'a [u8]) -> &'a Self { + // Safety: by this function invariant, we can call `B::store_from_bytes`, an unsafe fn + let store = B::store_from_bytes(store); + Self::from_borrowed_store_unchecked(store) } } diff --git a/utils/pattern/src/frontend/zerovec.rs b/utils/pattern/src/frontend/zerovec.rs new file mode 100644 index 00000000000..42b5a99bb66 --- /dev/null +++ b/utils/pattern/src/frontend/zerovec.rs @@ -0,0 +1,48 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use ::zerovec::{ule::VarULE, ZeroVecError}; + +/// Implement `VarULE` for `Pattern`. +/// +/// # Safety +/// +/// Note: `Pattern` is transparent over `B::Store`. +/// +/// Safety checklist for `ULE`: +/// +/// 1. Satisfied by `B::Store` being `VarULE` by the impl bound. +/// 2. Satisfied by `B::Store` being `VarULE` by the impl bound. +/// 3. The implementation of `validate_byte_slice()` returns an error +/// if any byte is not valid. +/// 4. The implementation of `validate_byte_slice()` returns an error +/// if the slice cannot be used to build a `Pattern` +/// in its entirety. +/// 5. The implementation of `from_byte_slice_unchecked()` returns a reference to the same data. +/// 6. All other methods *must* be left with their default impl. +/// 7. Satisfied by `B::Store` being `VarULE` by the impl bound. +unsafe impl VarULE for Pattern +where + B: PatternBackend, + B::Store: VarULE, +{ + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + match Self::try_from_bytes_store(bytes) { + Ok(_) => Ok(()), + Err(_) => Err(ZeroVecError::parse::()), + } + } + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + Self::from_bytes_store_unchecked(bytes) + } +} + +// TODO: +// impl<'a> ZeroMapKV<'a> for Pattern { +// type Container = VarZeroVec<'a, Pattern>; +// type Slice = VarZeroSlice>; +// type GetType = Pattern; +// type OwnedType = Box>; +// } diff --git a/utils/pattern/src/implementations.rs b/utils/pattern/src/implementations.rs deleted file mode 100644 index 692faf5b718..00000000000 --- a/utils/pattern/src/implementations.rs +++ /dev/null @@ -1,83 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use crate::{Pattern, SinglePlaceholder, SinglePlaceholderPattern}; - -use alloc::boxed::Box; -use zerovec::{maps::ZeroMapKV, ule::VarULE, VarZeroSlice, VarZeroVec, ZeroVecError}; - -impl<'a> ZeroMapKV<'a> for Pattern { - type Container = VarZeroVec<'a, Pattern>; - type Slice = VarZeroSlice>; - type GetType = Pattern; - type OwnedType = Box>; -} - -/// Implement `VarULE` for `Pattern`. -/// -/// # Safety -/// -/// Safety checklist for `ULE`: -/// -/// 1. `str` does not include any uninitialized or padding bytes. -/// 2. `str` is aligned to 1 byte. -/// 3. The implementation of `validate_byte_slice()` returns an error -/// if any byte is not valid. -/// 4. The implementation of `validate_byte_slice()` returns an error -/// if the slice cannot be used to build a `Pattern` -/// in its entirety. -/// 5. The implementation of `from_byte_slice_unchecked()` returns a reference to the same data. -/// 6. `parse_byte_slice()` is equivalent to `validate_byte_slice()` followed by `from_byte_slice_unchecked()`. -/// 7. `str` byte equality is semantic equality. -/// -/// # Examples -/// -/// ``` -/// use core::str::FromStr; -/// use icu_pattern::Pattern; -/// use icu_pattern::SinglePlaceholder; -/// use writeable::assert_writeable_eq; -/// use zerovec::ule::VarULE; -/// -/// // Create a pattern from a valid string: -/// let allocated_pattern = -/// Pattern::::from_str("{0} days") -/// .expect("valid pattern"); -/// -/// // Transform the store and create a new Pattern. This is valid because -/// // we call `.take_store()` and `.from_byte_slice_unchecked()` on patterns -/// // with the same backend (`SinglePlaceholder`). -/// let store = allocated_pattern.take_store(); -/// -/// assert!( -/// unsafe { -/// Pattern::::validate_byte_slice(store.as_bytes()).is_ok() -/// }); -/// -/// // SAFETY: store comes from a valid pattern -/// let borrowed_pattern: &Pattern = -/// unsafe { -/// Pattern::::from_byte_slice_unchecked(store.as_bytes()) -/// }; -/// -/// assert_writeable_eq!(borrowed_pattern.interpolate([5]), "5 days"); -/// ``` -unsafe impl VarULE for Pattern { - fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { - SinglePlaceholderPattern::try_from_utf8_store(bytes) - .map_err(|_| ZeroVecError::VarZeroVecFormatError)?; - Ok(()) - } - - unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { - // SAFETY: As `validate_byte_slice` succeeded, `try_from_utf8_store` succeeded, which implies valid UTF-8 - let store = core::str::from_utf8_unchecked(bytes); - - // SAFETY: - // As `validate_byte_slice` succeeded, this means `store` is valid. - // And the layout of `Pattern` is the same as `Store` - // because `_backend` is zero-sized and `store` is the only other field. - &*(store as *const str as *const Self) - } -} diff --git a/utils/pattern/src/lib.rs b/utils/pattern/src/lib.rs index 0094143d35c..292ec5d1293 100644 --- a/utils/pattern/src/lib.rs +++ b/utils/pattern/src/lib.rs @@ -55,8 +55,6 @@ mod common; mod double; mod error; mod frontend; -#[cfg(all(feature = "zerovec", feature = "alloc"))] -mod implementations; mod multi_named; #[cfg(feature = "alloc")] mod parser; diff --git a/utils/pattern/src/multi_named.rs b/utils/pattern/src/multi_named.rs index 5679b54f8ad..741ded59741 100644 --- a/utils/pattern/src/multi_named.rs +++ b/utils/pattern/src/multi_named.rs @@ -320,8 +320,15 @@ impl PatternBackend for MultiNamedPlaceholder { type Iter<'a> = MultiNamedPlaceholderPatternIterator<'a>; #[inline] - fn try_store_from_utf8(bytes: &[u8]) -> Result<&Self::Store, Self::StoreFromBytesError> { - core::str::from_utf8(bytes) + fn validate_store_bytes(bytes: &[u8]) -> Result<(), Self::StoreFromBytesError> { + core::str::from_utf8(bytes).map(|_| ()) + } + + #[inline] + unsafe fn store_from_bytes(bytes: &[u8]) -> &Self::Store { + // The invariant of this function is that `bytes` is valid + // according to `validate_store_bytes` + core::str::from_utf8_unchecked(bytes) } fn validate_store(store: &Self::Store) -> Result<(), Error> { diff --git a/utils/pattern/src/single.rs b/utils/pattern/src/single.rs index f7820d4f11d..8e903ebfb34 100644 --- a/utils/pattern/src/single.rs +++ b/utils/pattern/src/single.rs @@ -177,8 +177,15 @@ impl PatternBackend for SinglePlaceholder { type Iter<'a> = SinglePlaceholderPatternIterator<'a>; #[inline] - fn try_store_from_utf8(utf8: &[u8]) -> Result<&Self::Store, Self::StoreFromBytesError> { - core::str::from_utf8(utf8) + fn validate_store_bytes(bytes: &[u8]) -> Result<(), Self::StoreFromBytesError> { + core::str::from_utf8(bytes).map(|_| ()) + } + + #[inline] + unsafe fn store_from_bytes(bytes: &[u8]) -> &Self::Store { + // The invariant of this function is that `bytes` is valid + // according to `validate_store_bytes` + core::str::from_utf8_unchecked(bytes) } fn validate_store(store: &Self::Store) -> Result<(), Error> { From 394ceadfabb5aa819021824e09394a10c9269af0 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 12 Jul 2024 17:26:43 -0700 Subject: [PATCH 3/3] clippy --- utils/pattern/src/frontend/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/pattern/src/frontend/mod.rs b/utils/pattern/src/frontend/mod.rs index 05bfd748289..ecabfa1b747 100644 --- a/utils/pattern/src/frontend/mod.rs +++ b/utils/pattern/src/frontend/mod.rs @@ -174,7 +174,7 @@ where { /// Creates a borrowed DST pattern from a borrowed store. #[inline] - pub fn try_from_borrowed_store<'a>(store: &'a B::Store) -> Result<&'a Self, Error> { + pub fn try_from_borrowed_store(store: &B::Store) -> Result<&Self, Error> { B::validate_store(store)?; Ok(Self::from_borrowed_store_unchecked(store)) } @@ -185,7 +185,7 @@ where /// such as by calling [`Pattern::take_store()`]. If the store is not valid, /// unexpected behavior may occur. #[inline] - pub const fn from_borrowed_store_unchecked<'a>(store: &'a B::Store) -> &'a Self { + pub const fn from_borrowed_store_unchecked(store: &B::Store) -> &Self { // Safety: `Pattern` is transparent over `B::Store` unsafe { &*(store as *const B::Store as *const Pattern) } } @@ -205,9 +205,9 @@ where /// .expect("single placeholder pattern"); /// ``` #[inline] - pub fn try_from_bytes_store<'a>( - store: &'a [u8], - ) -> Result<&'a Self, PatternOrUtf8Error> { + pub fn try_from_bytes_store( + store: &[u8], + ) -> Result<&Self, PatternOrUtf8Error> { match B::validate_store_bytes(store) { Ok(()) => (), Err(e) => { @@ -227,7 +227,7 @@ where /// /// The bytes *must* be successfully convertible via [`Self::try_from_bytes_store`]. #[inline] - pub unsafe fn from_bytes_store_unchecked<'a>(store: &'a [u8]) -> &'a Self { + pub unsafe fn from_bytes_store_unchecked(store: &[u8]) -> &Self { // Safety: by this function invariant, we can call `B::store_from_bytes`, an unsafe fn let store = B::store_from_bytes(store); Self::from_borrowed_store_unchecked(store)