Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 1 addition & 74 deletions utils/ixdtf/src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,82 +4,9 @@

//! Core functionality for `ixdtf`'s parsers

use crate::encoding::EncodingType;
use crate::{ParseError, ParserResult};

mod private {
pub trait Sealed {}
}

/// A trait for defining various supported encodings
/// and implementing functionality that is encoding
/// sensitive / specific.
pub trait EncodingType: private::Sealed {
type CodeUnit: PartialEq + core::fmt::Debug + Clone;

/// Get a slice from the underlying source using for start..end
fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]>;

/// Retrieve the provided code unit index and returns the value as an ASCII byte
/// or None if the value is not ASCII representable.
fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>>;

/// Checks for the known calendar annotation key `u-ca`.
fn check_calendar_key(key: &[Self::CodeUnit]) -> bool;
}

/// A marker type that signals a parser should parse the source as UTF-16 bytes.
#[derive(Debug, PartialEq, Clone)]
#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added
pub struct Utf16;

impl private::Sealed for Utf16 {}

impl EncodingType for Utf16 {
type CodeUnit = u16;
fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> {
source.get(start..end)
}

fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>> {
source.get(index).copied().map(to_ascii_byte).transpose()
}

fn check_calendar_key(key: &[Self::CodeUnit]) -> bool {
key == [0x75, 0x2d, 0x63, 0x61]
}
}

#[inline]
fn to_ascii_byte(b: u16) -> ParserResult<u8> {
if !(0x01..0x7F).contains(&b) {
return Err(ParseError::NonAsciiCodePoint);
}
Ok(b as u8)
}

/// A marker type that signals a parser should parse the source as UTF-8 bytes.
#[derive(Debug, PartialEq, Clone)]
#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added.
pub struct Utf8;

impl private::Sealed for Utf8 {}

impl EncodingType for Utf8 {
type CodeUnit = u8;

fn slice<'a>(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> {
source.get(start..end)
}

fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>> {
Ok(source.get(index).copied())
}

fn check_calendar_key(key: &[Self::CodeUnit]) -> bool {
key == "u-ca".as_bytes()
}
}

// ==== Mini cursor implementation for Iso8601 targets ====

/// `Cursor` is a small cursor implementation for parsing Iso8601 grammar.
Expand Down
81 changes: 81 additions & 0 deletions utils/ixdtf/src/encoding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! This module contains the supported encoding for `ixdtf` parsing.

use crate::{ParseError, ParserResult};

mod private {
pub trait Sealed {}
}

/// A trait for defining various supported encodings
/// and implementing functionality that is encoding
/// sensitive / specific.
pub trait EncodingType: private::Sealed {
type CodeUnit: PartialEq + core::fmt::Debug + Clone;

/// Get a slice from the underlying source using for start..end
fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]>;

/// Retrieve the provided code unit index and returns the value as an ASCII byte
/// or None if the value is not ASCII representable.
fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>>;

/// Checks for the known calendar annotation key `u-ca`.
fn check_calendar_key(key: &[Self::CodeUnit]) -> bool;
}

/// A marker type that signals a parser should parse the source as UTF-16 bytes.
#[derive(Debug, PartialEq, Clone)]
#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added
pub struct Utf16;

impl private::Sealed for Utf16 {}

impl EncodingType for Utf16 {
type CodeUnit = u16;
fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> {
source.get(start..end)
}

fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>> {
source.get(index).copied().map(to_ascii_byte).transpose()
}

fn check_calendar_key(key: &[Self::CodeUnit]) -> bool {
key == [0x75, 0x2d, 0x63, 0x61]
}
}

#[inline]
fn to_ascii_byte(b: u16) -> ParserResult<u8> {
if !(0x01..0x7F).contains(&b) {
return Err(ParseError::NonAsciiCodePoint);
}
Ok(b as u8)
}

/// A marker type that signals a parser should parse the source as UTF-8 bytes.
#[derive(Debug, PartialEq, Clone)]
#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added.
pub struct Utf8;

impl private::Sealed for Utf8 {}

impl EncodingType for Utf8 {
type CodeUnit = u8;

fn slice<'a>(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> {
source.get(start..end)
}

fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>> {
Ok(source.get(index).copied())
}

fn check_calendar_key(key: &[Self::CodeUnit]) -> bool {
key == "u-ca".as_bytes()
}
}
10 changes: 4 additions & 6 deletions utils/ixdtf/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,17 +388,15 @@
)
)]

pub(crate) mod core;
mod error;

pub(crate) mod core;

pub mod encoding;
pub mod parsers;
pub mod records;

pub use error::ParseError;

/// This module contains the supported encoding for `ixdtf` parsing.
pub mod encoding {
pub use crate::core::{Utf16, Utf8};
}

/// The `ixdtf` crate's Result type.
pub type ParserResult<T> = Result<T, ParseError>;
2 changes: 1 addition & 1 deletion utils/ixdtf/src/parsers/annotations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use crate::{
assert_syntax,
core::EncodingType,
encoding::EncodingType,
parsers::{
grammar::{
is_a_key_char, is_a_key_leading_char, is_annotation_close,
Expand Down
2 changes: 1 addition & 1 deletion utils/ixdtf/src/parsers/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use crate::{
assert_syntax,
core::EncodingType,
encoding::EncodingType,
parsers::{
annotations,
grammar::{is_annotation_open, is_date_time_separator, is_hyphen, is_utc_designator},
Expand Down
2 changes: 1 addition & 1 deletion utils/ixdtf/src/parsers/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use crate::{
assert_syntax,
core::EncodingType,
encoding::EncodingType,
parsers::{
grammar::{
is_ascii_sign, is_day_designator, is_duration_designator, is_hour_designator,
Expand Down
5 changes: 3 additions & 2 deletions utils/ixdtf/src/parsers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

//! The parser module contains the implementation details for `IxdtfParser` and `IsoDurationParser`

use crate::core::{EncodingType, Utf16, Utf8};
use crate::{core::Cursor, ParserResult};
use crate::core::Cursor;
use crate::encoding::{EncodingType, Utf16, Utf8};
use crate::ParserResult;

#[cfg(feature = "duration")]
use crate::records::DurationParseRecord;
Expand Down
2 changes: 1 addition & 1 deletion utils/ixdtf/src/parsers/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use alloc::string::String;
use alloc::vec::Vec;

use crate::{
core::Utf16,
encoding::Utf16,
parsers::IxdtfParser,
records::{
Annotation, DateRecord, Fraction, IxdtfParseRecord, TimeRecord, TimeZoneAnnotation,
Expand Down
2 changes: 1 addition & 1 deletion utils/ixdtf/src/parsers/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use core::num::NonZeroU8;

use crate::{
assert_syntax,
core::EncodingType,
encoding::EncodingType,
parsers::{
datetime::{parse_month_day, parse_year_month},
grammar::{
Expand Down
2 changes: 1 addition & 1 deletion utils/ixdtf/src/parsers/timezone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use super::{
};
use crate::{
assert_syntax,
core::EncodingType,
encoding::EncodingType,
records::{
FullPrecisionOffset, MinutePrecisionOffset, Sign, TimeZoneAnnotation, TimeZoneRecord,
UtcOffsetRecord, UtcOffsetRecordOrZ,
Expand Down
2 changes: 1 addition & 1 deletion utils/ixdtf/src/records.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use core::num::NonZeroU8;

use crate::core::EncodingType;
use crate::encoding::EncodingType;

/// An `IxdtfParseRecord` is an intermediary record returned by `IxdtfParser`.
#[non_exhaustive]
Expand Down