Skip to content

Improve and add more impls in Pattern crate #5241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions utils/pattern/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub trait PatternBackend: crate::private::Sealed + 'static {
/// The type of error that the [`TryWriteable`] for this backend can return.
type Error<'a>;

/// The type of error that the [`PatternBackend::try_store_from_utf8`] can return.
/// The type of error that the [`PatternBackend::validate_store_bytes`] can return.
type StoreFromBytesError;

/// The unsized type of the store required for this backend, usually `str` or `[u8]`.
Expand All @@ -85,10 +85,24 @@ pub trait PatternBackend: crate::private::Sealed + 'static {
#[doc(hidden)] // TODO(#4467): Should be internal
type Iter<'a>: Iterator<Item = PatternItem<'a, Self::PlaceholderKey<'a>>>;

/// Converts a byte slice store to this pattern backend's store.
/// Does not perform validation of the store.
/// Validates that the bytes are valid in the store type,
/// but not that the store is valid for this pattern.
#[doc(hidden)] // TODO(#4467): Should be internal
fn try_store_from_utf8(bytes: &[u8]) -> Result<&Self::Store, Self::StoreFromBytesError>;
fn validate_store_bytes(bytes: &[u8]) -> Result<(), Self::StoreFromBytesError>;

/// # Safety
///
/// [`PatternBackend::validate_store_bytes`] *must* return `Ok` for the given bytes.
#[doc(hidden)] // TODO(#4467): Should be internal
unsafe fn store_from_bytes(bytes: &[u8]) -> &Self::Store;

// /// Converts a byte slice store to this pattern backend's store.
// ///
// /// # Safety
// ///
// /// The bytes must have been previously cast via [`try_store_from_utf8`].
// #[doc(hidden)] // TODO(#4467): Should be internal
// unsafe fn try_store_from_utf8(bytes: &[u8]) -> &Self::Store;

/// Checks a store for validity, returning an error if invalid.
#[doc(hidden)] // TODO(#4467): Should be internal
Expand Down
11 changes: 9 additions & 2 deletions utils/pattern/src/double.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,15 @@ impl PatternBackend for DoublePlaceholder {
type Iter<'a> = DoublePlaceholderPatternIterator<'a>;

#[inline]
fn try_store_from_utf8(bytes: &[u8]) -> Result<&Self::Store, Self::StoreFromBytesError> {
core::str::from_utf8(bytes)
fn validate_store_bytes(bytes: &[u8]) -> Result<(), Self::StoreFromBytesError> {
core::str::from_utf8(bytes).map(|_| ())
}

#[inline]
unsafe fn store_from_bytes(bytes: &[u8]) -> &Self::Store {
// The invariant of this function is that `bytes` is valid
// according to `validate_store_bytes`
core::str::from_utf8_unchecked(bytes)
}

fn validate_store(store: &Self::Store) -> Result<(), Error> {
Expand Down
67 changes: 55 additions & 12 deletions utils/pattern/src/frontend/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
mod databake;
#[cfg(feature = "serde")]
mod serde;
#[cfg(feature = "zerovec")]
mod zerovec;

use crate::common::*;
use crate::Error;
use crate::PatternOrUtf8Error;
Expand Down Expand Up @@ -83,6 +86,7 @@ use writeable::{adapters::TryWriteableInfallibleAsWriteable, PartsWrite, TryWrit
derive(zerofrom::ZeroFrom),
zerofrom(may_borrow(Store))
)]
#[repr(transparent)]
pub struct Pattern<Backend, Store: ?Sized> {
_backend: PhantomData<Backend>,
store: Store,
Expand Down Expand Up @@ -164,30 +168,69 @@ where
}
}

impl<'a, B> Pattern<B, &'a B::Store>
impl<B> Pattern<B, B::Store>
where
B: PatternBackend,
{
/// Creates a pattern from its store encoded as UTF-8.
/// Creates a borrowed DST pattern from a borrowed store.
#[inline]
pub fn try_from_borrowed_store(store: &B::Store) -> Result<&Self, Error> {
B::validate_store(store)?;
Ok(Self::from_borrowed_store_unchecked(store))
}

/// Creates a borrowed DST pattern from a borrowed store.
///
/// The store is expected to come from a valid `Pattern` with this `Backend`,
/// such as by calling [`Pattern::take_store()`]. If the store is not valid,
/// unexpected behavior may occur.
#[inline]
pub const fn from_borrowed_store_unchecked(store: &B::Store) -> &Self {
// Safety: `Pattern<B, B::Store>` is transparent over `B::Store`
unsafe { &*(store as *const B::Store as *const Pattern<B, B::Store>) }
}

/// Creates a pattern from its store encoded as bytes.
///
/// The bytes are checked to be a valid store in their entirety,
/// and the same reference is returned.
///
/// # Examples
///
/// ```
/// use icu_pattern::Pattern;
/// use icu_pattern::SinglePlaceholder;
///
/// Pattern::<SinglePlaceholder, _>::try_from_utf8_store(b"\x01 days")
/// Pattern::<SinglePlaceholder, _>::try_from_bytes_store(b"\x01 days")
/// .expect("single placeholder pattern");
/// ```
pub fn try_from_utf8_store(
code_units: &'a [u8],
) -> Result<Self, PatternOrUtf8Error<B::StoreFromBytesError>> {
let store = B::try_store_from_utf8(code_units).map_err(PatternOrUtf8Error::Utf8)?;
B::validate_store(store).map_err(PatternOrUtf8Error::Pattern)?;
Ok(Self {
_backend: PhantomData,
store,
})
#[inline]
pub fn try_from_bytes_store(
store: &[u8],
) -> Result<&Self, PatternOrUtf8Error<B::StoreFromBytesError>> {
match B::validate_store_bytes(store) {
Ok(()) => (),
Err(e) => {
return Err(PatternOrUtf8Error::Utf8(e));
}
}
// Safety: B::validate_store_bytes just succeeded
let store = unsafe { B::store_from_bytes(store) };
Self::try_from_borrowed_store(store).map_err(PatternOrUtf8Error::Pattern)
}

/// Creates a pattern from its store encoded as bytes.
///
/// The same reference is returned.
///
/// # Safety
///
/// The bytes *must* be successfully convertible via [`Self::try_from_bytes_store`].
#[inline]
pub unsafe fn from_bytes_store_unchecked(store: &[u8]) -> &Self {
// Safety: by this function invariant, we can call `B::store_from_bytes`, an unsafe fn
let store = B::store_from_bytes(store);
Self::from_borrowed_store_unchecked(store)
}
}

Expand Down
43 changes: 26 additions & 17 deletions utils/pattern/src/frontend/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,34 +78,43 @@ mod tests {
use super::*;
use crate::SinglePlaceholderPattern;

#[derive(Debug, PartialEq, Deserialize, Serialize)]
struct TestDerive<'a> {
#[serde(borrow)]
pub pattern_cow: SinglePlaceholderPattern<Cow<'a, str>>,
}

#[test]
fn test_json() {
let pattern_owned = SinglePlaceholderPattern::from_str("Hello, {0}!").unwrap();
let pattern_cow: SinglePlaceholderPattern<Cow<str>> =
SinglePlaceholderPattern::from_store_unchecked(Cow::Owned(pattern_owned.take_store()));
let pattern_json = serde_json::to_string(&pattern_cow).unwrap();
let pattern_obj = TestDerive { pattern_cow };
let pattern_json = serde_json::to_string(&pattern_obj).unwrap();
assert_eq!(
pattern_json,
r#"[{"Literal":"Hello, "},{"Placeholder":"Singleton"},{"Literal":"!"}]"#
r#"{"pattern_cow":[{"Literal":"Hello, "},{"Placeholder":"Singleton"},{"Literal":"!"}]}"#
);
let pattern_deserialized: SinglePlaceholderPattern<Cow<str>> =
serde_json::from_str(&pattern_json).unwrap();
assert_eq!(pattern_cow, pattern_deserialized);
assert!(matches!(pattern_deserialized.take_store(), Cow::Owned(_)));
let pattern_deserialized: TestDerive = serde_json::from_str(&pattern_json).unwrap();
assert_eq!(pattern_obj, pattern_deserialized);
assert!(matches!(
pattern_deserialized.pattern_cow.take_store(),
Cow::Owned(_)
));
}

#[test]
fn test_postcard() {
let pattern_owned = SinglePlaceholderPattern::from_str("Hello, {0}!").unwrap();
let pattern_cow: SinglePlaceholderPattern<Cow<str>> =
SinglePlaceholderPattern::from_store_unchecked(Cow::Owned(pattern_owned.take_store()));
let pattern_postcard = postcard::to_stdvec(&pattern_cow).unwrap();
let pattern_obj = TestDerive { pattern_cow };
let pattern_postcard = postcard::to_stdvec(&pattern_obj).unwrap();
assert_eq!(pattern_postcard, b"\x09\x08Hello, !");
let pattern_deserialized: SinglePlaceholderPattern<Cow<str>> =
postcard::from_bytes(&pattern_postcard).unwrap();
assert_eq!(pattern_cow, pattern_deserialized);
let pattern_deserialized: TestDerive = postcard::from_bytes(&pattern_postcard).unwrap();
assert_eq!(pattern_obj, pattern_deserialized);
assert!(matches!(
pattern_deserialized.take_store(),
pattern_deserialized.pattern_cow.take_store(),
Cow::Borrowed(_)
));
}
Expand All @@ -115,13 +124,13 @@ mod tests {
let pattern_owned = SinglePlaceholderPattern::from_str("Hello, {0}!").unwrap();
let pattern_cow: SinglePlaceholderPattern<Cow<str>> =
SinglePlaceholderPattern::from_store_unchecked(Cow::Owned(pattern_owned.take_store()));
let pattern_rmp = rmp_serde::to_vec(&pattern_cow).unwrap();
assert_eq!(pattern_rmp, b"\xA9\x08Hello, !");
let pattern_deserialized: SinglePlaceholderPattern<Cow<str>> =
rmp_serde::from_slice(&pattern_rmp).unwrap();
assert_eq!(pattern_cow, pattern_deserialized);
let pattern_obj = TestDerive { pattern_cow };
let pattern_rmp = rmp_serde::to_vec(&pattern_obj).unwrap();
assert_eq!(pattern_rmp, b"\x91\xA9\x08Hello, !");
let pattern_deserialized: TestDerive = rmp_serde::from_slice(&pattern_rmp).unwrap();
assert_eq!(pattern_obj, pattern_deserialized);
assert!(matches!(
pattern_deserialized.take_store(),
pattern_deserialized.pattern_cow.take_store(),
Cow::Borrowed(_)
));
}
Expand Down
48 changes: 48 additions & 0 deletions utils/pattern/src/frontend/zerovec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use super::*;
use ::zerovec::{ule::VarULE, ZeroVecError};

/// Implement `VarULE` for `Pattern`.
///
/// # Safety
///
/// Note: `Pattern<B, B::Store>` is transparent over `B::Store`.
///
/// Safety checklist for `ULE`:
///
/// 1. Satisfied by `B::Store` being `VarULE` by the impl bound.
/// 2. Satisfied by `B::Store` being `VarULE` by the impl bound.
/// 3. The implementation of `validate_byte_slice()` returns an error
/// if any byte is not valid.
/// 4. The implementation of `validate_byte_slice()` returns an error
/// if the slice cannot be used to build a `Pattern<B, B::Store>`
/// in its entirety.
/// 5. The implementation of `from_byte_slice_unchecked()` returns a reference to the same data.
/// 6. All other methods *must* be left with their default impl.
/// 7. Satisfied by `B::Store` being `VarULE` by the impl bound.
unsafe impl<B> VarULE for Pattern<B, B::Store>
where
B: PatternBackend,
B::Store: VarULE,
{
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
match Self::try_from_bytes_store(bytes) {
Ok(_) => Ok(()),
Err(_) => Err(ZeroVecError::parse::<Self>()),
}
}
unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
Self::from_bytes_store_unchecked(bytes)
}
}

// TODO:
// impl<'a> ZeroMapKV<'a> for Pattern<SinglePlaceholder, str> {
// type Container = VarZeroVec<'a, Pattern<SinglePlaceholder, str>>;
// type Slice = VarZeroSlice<Pattern<SinglePlaceholder, str>>;
// type GetType = Pattern<SinglePlaceholder, str>;
// type OwnedType = Box<Pattern<SinglePlaceholder, str>>;
// }
83 changes: 0 additions & 83 deletions utils/pattern/src/implementations.rs

This file was deleted.

2 changes: 0 additions & 2 deletions utils/pattern/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ mod common;
mod double;
mod error;
mod frontend;
#[cfg(all(feature = "zerovec", feature = "alloc"))]
mod implementations;
mod multi_named;
#[cfg(feature = "alloc")]
mod parser;
Expand Down
11 changes: 9 additions & 2 deletions utils/pattern/src/multi_named.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,15 @@ impl PatternBackend for MultiNamedPlaceholder {
type Iter<'a> = MultiNamedPlaceholderPatternIterator<'a>;

#[inline]
fn try_store_from_utf8(bytes: &[u8]) -> Result<&Self::Store, Self::StoreFromBytesError> {
core::str::from_utf8(bytes)
fn validate_store_bytes(bytes: &[u8]) -> Result<(), Self::StoreFromBytesError> {
core::str::from_utf8(bytes).map(|_| ())
}

#[inline]
unsafe fn store_from_bytes(bytes: &[u8]) -> &Self::Store {
// The invariant of this function is that `bytes` is valid
// according to `validate_store_bytes`
core::str::from_utf8_unchecked(bytes)
}

fn validate_store(store: &Self::Store) -> Result<(), Error> {
Expand Down
Loading
Loading