diff --git a/utils/ixdtf/src/core.rs b/utils/ixdtf/src/core.rs index b60d0965027..110c8fed344 100644 --- a/utils/ixdtf/src/core.rs +++ b/utils/ixdtf/src/core.rs @@ -4,82 +4,9 @@ //! Core functionality for `ixdtf`'s parsers +use crate::encoding::EncodingType; use crate::{ParseError, ParserResult}; -mod private { - pub trait Sealed {} -} - -/// A trait for defining various supported encodings -/// and implementing functionality that is encoding -/// sensitive / specific. -pub trait EncodingType: private::Sealed { - type CodeUnit: PartialEq + core::fmt::Debug + Clone; - - /// Get a slice from the underlying source using for start..end - fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]>; - - /// Retrieve the provided code unit index and returns the value as an ASCII byte - /// or None if the value is not ASCII representable. - fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult>; - - /// Checks for the known calendar annotation key `u-ca`. - fn check_calendar_key(key: &[Self::CodeUnit]) -> bool; -} - -/// A marker type that signals a parser should parse the source as UTF-16 bytes. -#[derive(Debug, PartialEq, Clone)] -#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added -pub struct Utf16; - -impl private::Sealed for Utf16 {} - -impl EncodingType for Utf16 { - type CodeUnit = u16; - fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> { - source.get(start..end) - } - - fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult> { - source.get(index).copied().map(to_ascii_byte).transpose() - } - - fn check_calendar_key(key: &[Self::CodeUnit]) -> bool { - key == [0x75, 0x2d, 0x63, 0x61] - } -} - -#[inline] -fn to_ascii_byte(b: u16) -> ParserResult { - if !(0x01..0x7F).contains(&b) { - return Err(ParseError::NonAsciiCodePoint); - } - Ok(b as u8) -} - -/// A marker type that signals a parser should parse the source as UTF-8 bytes. -#[derive(Debug, PartialEq, Clone)] -#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added. -pub struct Utf8; - -impl private::Sealed for Utf8 {} - -impl EncodingType for Utf8 { - type CodeUnit = u8; - - fn slice<'a>(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> { - source.get(start..end) - } - - fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult> { - Ok(source.get(index).copied()) - } - - fn check_calendar_key(key: &[Self::CodeUnit]) -> bool { - key == "u-ca".as_bytes() - } -} - // ==== Mini cursor implementation for Iso8601 targets ==== /// `Cursor` is a small cursor implementation for parsing Iso8601 grammar. diff --git a/utils/ixdtf/src/encoding.rs b/utils/ixdtf/src/encoding.rs new file mode 100644 index 00000000000..24e7a7c3144 --- /dev/null +++ b/utils/ixdtf/src/encoding.rs @@ -0,0 +1,85 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! This module contains the supported encoding for `ixdtf` parsing. + +use crate::{ParseError, ParserResult}; + +mod private { + pub trait Sealed {} +} + +/// A trait for defining various supported encodings +/// and implementing functionality that is encoding +/// sensitive / specific. +pub trait EncodingType: private::Sealed { + /// The code unit for the current encoding. + type CodeUnit: PartialEq + core::fmt::Debug + Clone; + + /// Get a slice from the underlying source using for start..end + #[doc(hidden)] + fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]>; + + /// Retrieve the provided code unit index and returns the value as an ASCII byte + /// or None if the value is not ASCII representable. + #[doc(hidden)] + fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult>; + + /// Checks for the known calendar annotation key `u-ca`. + #[doc(hidden)] + fn check_calendar_key(key: &[Self::CodeUnit]) -> bool; +} + +/// A marker type that signals a parser should parse the source as UTF-16 bytes. +#[derive(Debug, PartialEq, Clone)] +#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added +pub struct Utf16; + +impl private::Sealed for Utf16 {} + +impl EncodingType for Utf16 { + type CodeUnit = u16; + fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> { + source.get(start..end) + } + + fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult> { + source.get(index).copied().map(to_ascii_byte).transpose() + } + + fn check_calendar_key(key: &[Self::CodeUnit]) -> bool { + key == [0x75, 0x2d, 0x63, 0x61] + } +} + +#[inline] +fn to_ascii_byte(b: u16) -> ParserResult { + if !(0x01..0x7F).contains(&b) { + return Err(ParseError::NonAsciiCodePoint); + } + Ok(b as u8) +} + +/// A marker type that signals a parser should parse the source as UTF-8 bytes. +#[derive(Debug, PartialEq, Clone)] +#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added. +pub struct Utf8; + +impl private::Sealed for Utf8 {} + +impl EncodingType for Utf8 { + type CodeUnit = u8; + + fn slice<'a>(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> { + source.get(start..end) + } + + fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult> { + Ok(source.get(index).copied()) + } + + fn check_calendar_key(key: &[Self::CodeUnit]) -> bool { + key == "u-ca".as_bytes() + } +} diff --git a/utils/ixdtf/src/lib.rs b/utils/ixdtf/src/lib.rs index e12df2f3550..f24d92b7e53 100644 --- a/utils/ixdtf/src/lib.rs +++ b/utils/ixdtf/src/lib.rs @@ -388,17 +388,15 @@ ) )] -pub(crate) mod core; mod error; + +pub(crate) mod core; + +pub mod encoding; pub mod parsers; pub mod records; pub use error::ParseError; -/// This module contains the supported encoding for `ixdtf` parsing. -pub mod encoding { - pub use crate::core::{Utf16, Utf8}; -} - /// The `ixdtf` crate's Result type. pub type ParserResult = Result; diff --git a/utils/ixdtf/src/parsers/annotations.rs b/utils/ixdtf/src/parsers/annotations.rs index dc4610e18a8..054d2db0c3e 100644 --- a/utils/ixdtf/src/parsers/annotations.rs +++ b/utils/ixdtf/src/parsers/annotations.rs @@ -6,7 +6,7 @@ use crate::{ assert_syntax, - core::EncodingType, + encoding::EncodingType, parsers::{ grammar::{ is_a_key_char, is_a_key_leading_char, is_annotation_close, diff --git a/utils/ixdtf/src/parsers/datetime.rs b/utils/ixdtf/src/parsers/datetime.rs index 420044f261e..f37a0d339df 100644 --- a/utils/ixdtf/src/parsers/datetime.rs +++ b/utils/ixdtf/src/parsers/datetime.rs @@ -6,7 +6,7 @@ use crate::{ assert_syntax, - core::EncodingType, + encoding::EncodingType, parsers::{ annotations, grammar::{is_annotation_open, is_date_time_separator, is_hyphen, is_utc_designator}, diff --git a/utils/ixdtf/src/parsers/duration.rs b/utils/ixdtf/src/parsers/duration.rs index c3d5e845f06..7b6c8bbac6b 100644 --- a/utils/ixdtf/src/parsers/duration.rs +++ b/utils/ixdtf/src/parsers/duration.rs @@ -6,7 +6,7 @@ use crate::{ assert_syntax, - core::EncodingType, + encoding::EncodingType, parsers::{ grammar::{ is_ascii_sign, is_day_designator, is_duration_designator, is_hour_designator, diff --git a/utils/ixdtf/src/parsers/mod.rs b/utils/ixdtf/src/parsers/mod.rs index 662d4940566..6d31a6133e0 100644 --- a/utils/ixdtf/src/parsers/mod.rs +++ b/utils/ixdtf/src/parsers/mod.rs @@ -4,8 +4,9 @@ //! The parser module contains the implementation details for `IxdtfParser` and `IsoDurationParser` -use crate::core::{EncodingType, Utf16, Utf8}; -use crate::{core::Cursor, ParserResult}; +use crate::core::Cursor; +use crate::encoding::{EncodingType, Utf16, Utf8}; +use crate::ParserResult; #[cfg(feature = "duration")] use crate::records::DurationParseRecord; diff --git a/utils/ixdtf/src/parsers/tests.rs b/utils/ixdtf/src/parsers/tests.rs index a4edc6e583b..aca518077c0 100644 --- a/utils/ixdtf/src/parsers/tests.rs +++ b/utils/ixdtf/src/parsers/tests.rs @@ -9,7 +9,7 @@ use alloc::string::String; use alloc::vec::Vec; use crate::{ - core::Utf16, + encoding::Utf16, parsers::IxdtfParser, records::{ Annotation, DateRecord, Fraction, IxdtfParseRecord, TimeRecord, TimeZoneAnnotation, diff --git a/utils/ixdtf/src/parsers/time.rs b/utils/ixdtf/src/parsers/time.rs index f67ecd5ae88..5ef65d6bb8b 100644 --- a/utils/ixdtf/src/parsers/time.rs +++ b/utils/ixdtf/src/parsers/time.rs @@ -8,7 +8,7 @@ use core::num::NonZeroU8; use crate::{ assert_syntax, - core::EncodingType, + encoding::EncodingType, parsers::{ datetime::{parse_month_day, parse_year_month}, grammar::{ diff --git a/utils/ixdtf/src/parsers/timezone.rs b/utils/ixdtf/src/parsers/timezone.rs index 9c4741f1404..1b3bfd843b3 100644 --- a/utils/ixdtf/src/parsers/timezone.rs +++ b/utils/ixdtf/src/parsers/timezone.rs @@ -15,7 +15,7 @@ use super::{ }; use crate::{ assert_syntax, - core::EncodingType, + encoding::EncodingType, records::{ FullPrecisionOffset, MinutePrecisionOffset, Sign, TimeZoneAnnotation, TimeZoneRecord, UtcOffsetRecord, UtcOffsetRecordOrZ, diff --git a/utils/ixdtf/src/records.rs b/utils/ixdtf/src/records.rs index 1ff1e796a11..cda4bc25d56 100644 --- a/utils/ixdtf/src/records.rs +++ b/utils/ixdtf/src/records.rs @@ -6,7 +6,7 @@ use core::num::NonZeroU8; -use crate::core::EncodingType; +use crate::encoding::EncodingType; /// An `IxdtfParseRecord` is an intermediary record returned by `IxdtfParser`. #[non_exhaustive]