Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions provider/blob/src/blob_data_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::blob_schema::BlobBoundLocaleSchema;
use crate::blob_schema::BlobSchema;
#[cfg(feature = "alloc")]
use alloc::boxed::Box;
use icu_provider::buf::BufferFormat;
use icu_provider::prelude::*;
use icu_provider::unstable::BindLocale;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you start depending on unstable APIs, do we need to use a ~ dep?

use icu_provider::unstable::BindLocaleResponse;
use icu_provider::unstable::BoundLocaleDataProvider;
use icu_provider::unstable::DataAttributesRequest;
use icu_provider::Cart;
use icu_provider::DynamicDryDataProvider;
use yoke::*;
Expand Down Expand Up @@ -166,6 +171,91 @@ impl IterableDynamicDataProvider<BufferMarker> for BlobDataProvider {
}
}

/// A [`BlobDataProvider`] that returns data for a specific marker and locale.
///
/// # Examples
///
/// ```
/// use icu_locale_core::locale;
/// use icu_provider::prelude::*;
/// use icu_provider::buf::DeserializingBufferProvider;
/// use icu_provider::hello_world::HelloWorldV1;
/// use icu_provider::hello_world::HelloWorldFormatterPreferences;
/// use icu_provider::unstable::BindLocale;
/// use icu_provider::unstable::BoundLocaleDataProvider;
/// use icu_provider::unstable::DataAttributesRequest;
/// use icu_provider_blob::BlobDataProvider;
/// use writeable::assert_writeable_eq;
///
/// // Read an ICU4X data blob statically:
/// const HELLO_WORLD_BLOB: &[u8] = include_bytes!("../tests/data/v3.postcard");
///
/// // Create a DataProvider from it:
/// let provider = BlobDataProvider::try_new_from_static_blob(HELLO_WORLD_BLOB)
/// .expect("Deserialization should succeed");
///
/// // Bind a specific marker and locale:
/// let locale = HelloWorldV1::INFO.make_locale(HelloWorldFormatterPreferences::from(locale!("en")).locale_preferences);
/// let bound_provider = provider.bind_locale(HelloWorldV1::INFO, DataRequest {
/// metadata: Default::default(),
/// id: DataIdentifierBorrowed::for_locale(&locale)
/// }).unwrap().bound_provider;
///
/// // Now load a specific attribute:
/// let response = BoundLocaleDataProvider::<HelloWorldV1>::load_bound(
/// &DeserializingBufferProvider::new(&bound_provider),
/// DataAttributesRequest {
/// marker_attributes: DataMarkerAttributes::try_from_str("reverse").unwrap(),
/// metadata: Default::default()
/// }
/// ).unwrap();
///
/// assert_writeable_eq!(response.payload.get().message, "Olleh Dlrow");
/// ```
#[derive(Debug)]
pub struct BlobBoundLocaleDataProvider {
pub(crate) data: Yoke<BlobBoundLocaleSchema<'static>, Option<Cart>>,
}

impl BindLocale<BufferMarker> for BlobDataProvider {
type BoundLocaleDataProvider = BlobBoundLocaleDataProvider;
fn bind_locale(
&self,
marker: DataMarkerInfo,
req: DataRequest,
) -> Result<BindLocaleResponse<Self::BoundLocaleDataProvider>, DataError> {
let payload: Yoke<(BlobBoundLocaleSchema, Option<u64>), Option<Cart>> = self
.data
.try_map_project_cloned(|blob, _| blob.bind_locale(marker, req))?;
let mut metadata = DataResponseMetadata::default();
metadata.buffer_format = Some(BufferFormat::Postcard1);
metadata.checksum = payload.get().1;
Ok(BindLocaleResponse {
metadata,
bound_provider: BlobBoundLocaleDataProvider {
data: payload.map_project(|(inner, _), _| inner),
},
})
}
}

impl BoundLocaleDataProvider<BufferMarker> for BlobBoundLocaleDataProvider {
fn load_bound(
&self,
req: DataAttributesRequest,
) -> Result<DataResponse<BufferMarker>, DataError> {
let payload: Yoke<&[u8], Option<Cart>> =
self.data.try_map_project_cloned(|blob, _| blob.load(req))?;
let mut metadata = DataResponseMetadata::default();
metadata.buffer_format = Some(BufferFormat::Postcard1);
// Note: the checksum is returned by `bind_locale()` instead of `load_bound()`
Ok(DataResponse {
metadata,
payload: DataPayload::from_yoked_buffer(payload),
})
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
127 changes: 105 additions & 22 deletions provider/blob/src/blob_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use core::fmt::Write;
use icu_provider::{marker::DataMarkerIdHash, prelude::*};
use icu_provider::marker::DataMarkerIdHash;
use icu_provider::prelude::*;
use icu_provider::unstable::DataAttributesRequest;
use serde::Deserialize;
use writeable::Writeable;
use zerotrie::ZeroTrieSimpleAscii;
use zerotrie::{cursor::ZeroTrieSimpleAsciiCursor, ZeroTrieSimpleAscii};
use zerovec::vecs::{Index16, Index32, VarZeroSlice, VarZeroVecFormat, ZeroSlice};

/// A versioned Serde schema for ICU4X data blobs.
Expand Down Expand Up @@ -52,6 +54,20 @@ impl<'data> BlobSchema<'data> {
}
}

pub(crate) fn bind_locale(
&self,
marker: DataMarkerInfo,
req: DataRequest,
) -> Result<(BlobBoundLocaleSchema<'data>, Option<u64>), DataError> {
match self {
BlobSchema::V001(..) | BlobSchema::V002(..) | BlobSchema::V002Bigger(..) => {
unreachable!("Unreachable blob schema")
}
BlobSchema::V003(s) => s.bind_locale(marker, req),
BlobSchema::V003Bigger(s) => s.bind_locale(marker, req),
}
}

#[cfg(feature = "alloc")]
pub fn iter_ids(
&self,
Expand Down Expand Up @@ -125,15 +141,28 @@ impl<LocaleVecFormat: VarZeroVecFormat> Default for BlobSchemaV1<'_, LocaleVecFo
}
}

fn load_attributes(
mut cursor: ZeroTrieSimpleAsciiCursor,
marker_attributes: &DataMarkerAttributes,
metadata: DataRequestMetadata,
) -> Option<usize> {
let _infallible_ascii = marker_attributes.write_to(&mut cursor);
loop {
let index = cursor.take_value();
if index.is_some() || !metadata.attributes_prefix_match {
break index;
}
// Match the shortest attribute sharing a prefix.
cursor.probe(0);
}
}

impl<'data, LocaleVecFormat: VarZeroVecFormat> BlobSchemaV1<'data, LocaleVecFormat> {
pub fn load(
pub(crate) fn get_trie(
&self,
marker: DataMarkerInfo,
req: DataRequest,
) -> Result<(&'data [u8], Option<u64>), DataError> {
if marker.is_singleton && !req.id.locale.is_unknown() {
return Err(DataErrorKind::InvalidRequest.with_req(marker, req));
}
) -> Result<ZeroTrieSimpleAscii<&'data [u8]>, DataError> {
let marker_index = self
.markers
.binary_search(&marker.id.hashed())
Expand All @@ -143,22 +172,23 @@ impl<'data, LocaleVecFormat: VarZeroVecFormat> BlobSchemaV1<'data, LocaleVecForm
.locales
.get(marker_index)
.ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(marker, req))?;
let mut cursor = ZeroTrieSimpleAscii::from_store(zerotrie).into_cursor();
Ok(ZeroTrieSimpleAscii::from_store(zerotrie))
}

pub(crate) fn load(
&self,
marker: DataMarkerInfo,
req: DataRequest,
) -> Result<(&'data [u8], Option<u64>), DataError> {
if marker.is_singleton && !req.id.locale.is_unknown() {
return Err(DataErrorKind::InvalidRequest.with_req(marker, req));
}
let zerotrie = self.get_trie(marker, req)?;
let mut cursor = zerotrie.into_cursor();
let _infallible_ascii = req.id.locale.write_to(&mut cursor);
let blob_index = if !req.id.marker_attributes.is_empty() {
let _infallible_ascii = cursor.write_char(REQUEST_SEPARATOR);
req.id
.marker_attributes
.write_to(&mut cursor)
.map_err(|_| DataErrorKind::IdentifierNotFound.with_req(marker, req))?;
loop {
if let Some(v) = cursor.take_value() {
break Some(v);
}
if !req.metadata.attributes_prefix_match || cursor.probe(0).is_none() {
break None;
}
}
load_attributes(cursor, req.id.marker_attributes, req.metadata)
} else {
cursor.take_value()
}
Expand All @@ -176,8 +206,36 @@ impl<'data, LocaleVecFormat: VarZeroVecFormat> BlobSchemaV1<'data, LocaleVecForm
))
}

fn get_checksum(&self, zerotrie: &[u8]) -> Option<u64> {
ZeroTrieSimpleAscii::from_store(zerotrie)
pub(crate) fn bind_locale(
&self,
marker: DataMarkerInfo,
req: DataRequest,
) -> Result<(BlobBoundLocaleSchema<'data>, Option<u64>), DataError> {
// Note: singleton markers do not make sense with this function
if marker.is_singleton || req.id.locale.is_unknown() {
return Err(DataErrorKind::InvalidRequest.with_req(marker, req));
}
let zerotrie = self.get_trie(marker, req)?;
let mut cursor = zerotrie.into_cursor();
let _infallible_ascii = req.id.locale.write_to(&mut cursor);
let _infallible_ascii = cursor.write_char(REQUEST_SEPARATOR);
if cursor.is_empty() {
return Err(DataErrorKind::IdentifierNotFound.with_req(marker, req));
}
Ok((
BlobBoundLocaleSchema {
attributes_trie: cursor.into_suffix_trie(),
buffers: self.buffers,
},
marker
.has_checksum
.then(|| self.get_checksum(zerotrie))
.flatten(),
))
}

fn get_checksum(&self, zerotrie: ZeroTrieSimpleAscii<&'data [u8]>) -> Option<u64> {
zerotrie
.get(CHECKSUM_KEY)
.and_then(|cs| Some(u64::from_le_bytes(self.buffers.get(cs)?.try_into().ok()?)))
}
Expand Down Expand Up @@ -241,3 +299,28 @@ impl<'data, LocaleVecFormat: VarZeroVecFormat> BlobSchemaV1<'data, LocaleVecForm
debug_assert!(seen_max);
}
}

#[derive(Clone, Copy, Debug, yoke::Yokeable)]
pub(crate) struct BlobBoundLocaleSchema<'data> {
attributes_trie: ZeroTrieSimpleAscii<&'data [u8]>,
buffers: &'data VarZeroSlice<[u8], Index32>,
}

impl<'data> BlobBoundLocaleSchema<'data> {
pub(crate) fn load(&self, req: DataAttributesRequest) -> Result<&'data [u8], DataError> {
let blob_index = load_attributes(
self.attributes_trie.cursor(),
req.marker_attributes,
req.metadata,
)
.ok_or_else(|| {
DataErrorKind::IdentifierNotFound
.into_error()
.with_debug_context(req.marker_attributes)
})?;
let buffer = self.buffers.get(blob_index).ok_or_else(|| {
DataError::custom("Invalid blob bytes").with_debug_context(req.marker_attributes)
})?;
Ok(buffer)
}
}
5 changes: 5 additions & 0 deletions provider/blob/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@ mod blob_schema;
pub mod export;

pub use blob_data_provider::BlobDataProvider;

/// Additional types for mostly internal usage.
pub mod unstable {
pub use super::blob_data_provider::BlobBoundLocaleDataProvider;
}
43 changes: 43 additions & 0 deletions provider/core/src/buf/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::buf::BufferFormat;
use crate::buf::BufferProvider;
use crate::data_provider::DynamicDryDataProvider;
use crate::prelude::*;
use crate::unstable::BoundLocaleDataProvider;
use crate::DryDataProvider;
use serde::de::Deserialize;
use yoke::Yokeable;
Expand Down Expand Up @@ -59,6 +60,22 @@ where
}
}

impl<'a, P> DeserializingBufferProvider<'a, P> {
/// Wraps the given provider in a [`DeserializingBufferProvider`].
///
/// This requires enabling the deserialization Cargo feature
/// for the expected format(s):
///
/// - `deserialize_json`
/// - `deserialize_postcard_1`
/// - `deserialize_bincode_1`
///
/// ✨ *Enabled with the `serde` Cargo feature.*
pub fn new(inner: &'a P) -> Self {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the canonical way to wrap a provider in a DeserializingBufferProvider is to call .as_deserializing(). we use this in a lot of docs, and I don't think we should introduce an alternative API that does the same

Self(inner)
}
}

fn deserialize_impl<'data, M>(
// Allow `bytes` to be unused in case all buffer formats are disabled
#[allow(unused_variables)] bytes: &'data [u8],
Expand Down Expand Up @@ -230,6 +247,32 @@ where
}
}

impl<P, M> BoundLocaleDataProvider<M> for DeserializingBufferProvider<'_, P>
where
M: DynamicDataMarker,
P: BoundLocaleDataProvider<BufferMarker> + ?Sized,
for<'de> <M::DataStruct as Yokeable<'de>>::Output: Deserialize<'de>,
{
fn load_bound(
&self,
req: crate::request::DataAttributesRequest,
) -> Result<DataResponse<M>, DataError> {
let buffer_response = self.0.load_bound(req)?;
let buffer_format = buffer_response.metadata.buffer_format.ok_or_else(|| {
DataErrorKind::Deserialize
.with_str_context("BufferProvider didn't set BufferFormat")
.with_debug_context(&req)
})?;
Ok(DataResponse {
metadata: buffer_response.metadata,
payload: buffer_response
.payload
.into_deserialized(buffer_format)
.map_err(|e| e.with_debug_context(&req))?,
})
}
}

#[cfg(feature = "deserialize_json")]
impl From<serde_json::error::Error> for DataError {
fn from(e: serde_json::error::Error) -> Self {
Expand Down
Loading