Skip to content
Merged

Perf #115

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name = "shvproto"
description = "Rust implementation of the SHV protocol"
license = "MIT"
repository = "https://github.com/silicon-heaven/libshvproto-rs"
version = "6.1.9"
version = "6.1.10"
edition = "2024"

[dependencies]
Expand Down
29 changes: 11 additions & 18 deletions src/cpon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -505,17 +505,14 @@ impl<'a, R> CponReader<'a, R>
fn read_datetime(&mut self) -> Result<Value, ReadError> {
self.get_byte()?; // eat 'd'
let v = self.read_string()?;
if let Value::String(sdata) = v {
match DateTime::from_iso_str(&sdata) {
Ok(dt) => {
return Ok(Value::from(dt));
}
Err(err) => {
return Err(self.make_error(&err, ReadErrorReason::InvalidCharacter))
}
match DateTime::from_iso_str(&v) {
Ok(dt) => {
Ok(Value::from(dt))
}
Err(err) => {
Err(self.make_error(&err, ReadErrorReason::InvalidCharacter))
}
}
Err(self.make_error("Invalid DateTime", ReadErrorReason::InvalidCharacter))
}
fn read_true(&mut self) -> Result<Value, ReadError> {
self.read_text_token("true")?;
Expand Down Expand Up @@ -545,7 +542,7 @@ where R: Read
fn make_error(&self, msg: &str, reason: ReadErrorReason) -> ReadError {
self.byte_reader.make_error(&format!("Cpon read error - {msg}"), reason)
}
fn read_string(&mut self) -> Result<Value, ReadError> {
fn read_string(&mut self) -> Result<String, ReadError> {
let mut buff: Vec<u8> = Vec::new();
self.get_byte()?; // eat "
loop {
Expand All @@ -572,9 +569,9 @@ where R: Read
}
}
}
let s = std::str::from_utf8(&buff);
let s = String::from_utf8(buff);
match s {
Ok(s) => Ok(Value::from(s)),
Ok(s) => Ok(s),
Err(e) => Err(self.make_error(&format!("Invalid String, Utf8 error: {e}"), ReadErrorReason::InvalidCharacter)),
}
}
Expand Down Expand Up @@ -615,7 +612,7 @@ impl<R> Reader for CponReader<'_, R>
let b = self.peek_byte()?;
let v = match &b {
b'0' ..= b'9' | b'+' | b'-' => self.read_number(),
b'"' => self.read_string(),
b'"' => self.read_string().map(Value::from),
b'b' => self.read_blob_esc(),
b'x' => self.read_blob_hex(),
b'[' => self.read_list(),
Expand Down Expand Up @@ -681,11 +678,7 @@ impl<R> Reader for CponReader<'_, R>
if b == b'"' {
// Regular map keys are strings
let key = self.read_string()?;
if let Value::String(s) = key {
Ok(MapKey::String((*s).clone()))
} else {
Err(self.make_error("Expected string key in map", ReadErrorReason::InvalidCharacter))
}
Ok(MapKey::String(key))
} else if b.is_ascii_digit() || b == b'+' || b == b'-' {
// IMap keys are integers
let ReadInt{ value, is_negative, .. } = self.read_int(0, false)?;
Expand Down
211 changes: 164 additions & 47 deletions src/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use std::cmp::Ordering;
use std::fmt;
use chrono::{FixedOffset, NaiveDateTime, Offset};
use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone};

/// msec: 57, tz: 7;
/// tz is stored as signed count of quarters of hour (15 min)
Expand All @@ -28,10 +28,6 @@ impl Default for ToISOStringOptions {
#[derive(Debug, Clone, PartialEq, Copy)]
pub struct DateTime(i64);
impl DateTime {
//pub fn invalid() -> DateTime {
// DateTime::from_epoch_msec(0)
//}
//pub fn is_valid(&self) -> bool { }

#[cfg(feature = "serde")]
pub(crate) fn from_inner(inner: i64) -> Self {
Expand Down Expand Up @@ -71,53 +67,123 @@ impl DateTime {
Self::from_epoch_msec_tz(epoch_msec, 0)
}
pub fn from_iso_str(iso_str: &str) -> Result<DateTime, String> {
const PATTERN: &str = "2020-02-03T11:59:43";
if iso_str.len() >= PATTERN.len() {
let s = iso_str;
let naive_str = &s[..PATTERN.len()];
if let Ok(ndt) = chrono::NaiveDateTime::parse_from_str(naive_str, "%Y-%m-%dT%H:%M:%S") {
let mut msec = 0;
let mut offset = 0;
let mut rest = &s[PATTERN.len()..];
if matches!(rest.as_bytes().first(), Some(b'.')) {
rest = &rest[1..];
if rest.len() >= 3 {
match rest[..3].parse::<i32>() {
Ok(ms) => {
msec = ms;
rest = &rest[3..];
}
Err(err) => {
return Err(format!("Parsing DateTime msec part error: {err}, in '{iso_str}"))
}
}
}
const BASE_LEN: usize = 19;
let b = iso_str.as_bytes();
let invalid_datetime = || format!("Invalid DateTime: '{iso_str}'");

// Check if we have at least enough characters for a timestamp without a ms or timezone.
let Some((base, mut rest)) = b.split_at_checked(BASE_LEN) else {
return Err(invalid_datetime());
};

fn parse_u32(slice: &[u8]) -> Option<u32> {
let mut value = 0;
for &byte in slice {
if !byte.is_ascii_digit() {
return None;
}
value = value * 10 + u32::from(byte - b'0');
}
Some(value)
}

fn take_u32(slice: &mut &[u8], len: usize) -> Option<u32> {
let (head, tail) = slice.split_at_checked(len)?;
*slice = tail;
parse_u32(head)
}

let &[
y0, y1, y2, y3, b'-',
mo0, mo1, b'-',
d0, d1, b'T',
h0, h1, b':',
mi0, mi1, b':',
s0, s1,
] = base else {
return Err(invalid_datetime());
};

let year = parse_u32(&[y0, y1, y2, y3]).ok_or_else(invalid_datetime)?;
let month = parse_u32(&[mo0, mo1]).ok_or_else(invalid_datetime)?;
let day = parse_u32(&[d0, d1]).ok_or_else(invalid_datetime)?;

let naive_date = NaiveDate::from_ymd_opt(year.cast_signed(), month, day).ok_or_else(invalid_datetime)?;

let hour = parse_u32(&[h0, h1]).ok_or_else(invalid_datetime)?;
let minute = parse_u32(&[mi0, mi1]).ok_or_else(invalid_datetime)?;
let second = parse_u32(&[s0, s1]).ok_or_else(invalid_datetime)?;

let invalid_datetime_msec = || format!("Parsing DateTime msec part error, in '{iso_str}'");

let msec = if let Some((&b'.', tail)) = rest.split_first() {
rest = tail;
let digits = rest.iter().take(3).take_while(|&&c| c.is_ascii_digit()).count();
if digits == 0 {
return Err(invalid_datetime_msec());
}
let val = take_u32(&mut rest, digits).ok_or_else(invalid_datetime_msec)?;

// The fractional part can have more than 3 digits of precision, we cut the rest.
let skip = rest.iter().take_while(|b| b.is_ascii_digit()).count();
rest = rest
.get(skip..)
.expect("skip comes from counting rest digits");

match digits {
1 => val * 100,
2 => val * 10,
3 => val,
_ => unreachable!("digits capped at 3 by .take(3)"),
}
} else {
0
};

let naive_time = NaiveTime::from_hms_milli_opt(hour, minute, second, msec).ok_or_else(invalid_datetime)?;

let invalid_datetime_part = |part| format!("Invalid DateTime TZ part: '{part:?}', date time: '{iso_str}'");

let offset_seconds = match rest.split_first() {
Some((b'Z', tail)) => {
if !tail.is_empty() {
return Err(invalid_datetime_part(tail));
}
0
},
Some((&sign @ (b'+' | b'-'), tail)) => {
let sign: i32 = if sign == b'-' { -1 } else { 1 };
match *tail {
[hh_0, hh_1, b':', mm_0, mm_1] | [hh_0, hh_1, mm_0, mm_1] => {
let hh = parse_u32(&[hh_0, hh_1]).ok_or_else(|| invalid_datetime_part(tail))?;
let mm = parse_u32(&[mm_0, mm_1]).ok_or_else(|| invalid_datetime_part(tail))?;
sign * (hh * 3600 + mm * 60).cast_signed()
}
if !rest.is_empty() {
if rest.len() == 1 && *rest.as_bytes().first().expect("len() is 1") == b'Z' {
} else if rest.len() == 3 {
if let Ok(hrs) = rest.parse::<i32>() {
offset = 60 * 60 * hrs;
} else {
return Err(format!("Invalid DateTime TZ(3) part: '{rest}, date time: {iso_str}"))
}
} else if rest.len() == 5 {
if let Ok(hrs) = rest.parse::<i32>() {
offset = 60 * (60 * (hrs / 100) + (hrs % 100));
} else {
return Err(format!("Invalid DateTime TZ(5) part: '{rest}, date time: {iso_str}"))
}
} else {
return Err(format!("Invalid DateTime TZ part: '{rest}, date time: {iso_str}"))
}
[hh_0, hh_1] => {
let hh = parse_u32(&[hh_0, hh_1]).ok_or_else(|| invalid_datetime_part(tail))?;
sign * (hh * 3600).cast_signed()
}
let epoch_msec = (ndt.and_utc().timestamp() - i64::from(offset)) * 1000 + i64::from(msec);
let dt = DateTime::from_epoch_msec_tz(epoch_msec, offset);
return Ok(dt)
_ => return Err(invalid_datetime_part(tail)),
}
},
None => {
0
}
_ => {
return Err(invalid_datetime_part(rest));
}
Err(format!("Invalid DateTime: '{iso_str:?}"))
};

let tz = FixedOffset::east_opt(offset_seconds).ok_or_else(|| format!("Invalid timezone offset seconds: {offset_seconds}"))?;

let naive_datetime = naive_date.and_time(naive_time);
let chrono_dt = tz.from_local_datetime(&naive_datetime).single().ok_or_else(invalid_datetime)?;
let epoch_msec = chrono_dt.timestamp_millis();

let dt = DateTime::from_epoch_msec_tz(epoch_msec, offset_seconds);
Ok(dt)
}

pub fn epoc_msec_utc_offset(self) -> (i64, i32) {
let msec= self.0 / (TZ_MASK + 1);
let mut offset = self.0 & TZ_MASK;
Expand Down Expand Up @@ -234,3 +300,54 @@ impl From<NaiveDateTime> for DateTime {
DateTime::from_naive_datetime(&ndt)
}
}

#[cfg(test)]
mod test {
use super::DateTime;

const MINUTE: i32 = 60;
const HOUR: i32 = 60 * MINUTE;

#[test]
fn from_iso_str_parses_timezone_forms() {
for (input, expected) in [
("2021-11-08T01:02:03", DateTime::from_epoch_msec_tz(1_636_333_323_000, 0)),
("2021-11-08T01:02:03Z", DateTime::from_epoch_msec_tz(1_636_333_323_000, 0)),
("2021-11-08T01:02:03+05", DateTime::from_epoch_msec_tz(1_636_315_323_000, 5 * HOUR)),
("2021-11-08T01:02:03+05:30", DateTime::from_epoch_msec_tz(1_636_313_523_000, 5 * HOUR + 30 * MINUTE)),
("2021-11-08T01:02:03-0815", DateTime::from_epoch_msec_tz(1_636_363_023_000, -8 * HOUR - 15 * MINUTE)),
] {
assert_eq!(DateTime::from_iso_str(input), Ok(expected));
}
}

#[test]
fn from_iso_str_parses_fractional_milliseconds() {
for (input, expected) in [
("2021-11-08T01:02:03.1Z", DateTime::from_epoch_msec_tz(1_636_333_323_100, 0)),
("2021-11-08T01:02:03.12Z", DateTime::from_epoch_msec_tz(1_636_333_323_120, 0)),
("2021-11-08T01:02:03.123Z", DateTime::from_epoch_msec_tz(1_636_333_323_123, 0)),
("2021-11-08T01:02:03.1234Z", DateTime::from_epoch_msec_tz(1_636_333_323_123, 0)),
] {
assert_eq!(DateTime::from_iso_str(input), Ok(expected));
}
}

#[test]
fn from_iso_str_rejects_invalid_inputs() {
for input in [
"2021-11-08T01:02:03.",
"2021-11-08T01:02:03+",
"2021-11-08T01:02:03+0",
"2021-11-08T01:02:03+050",
"2021-11-08T01:02:03+05:0",
"2021-11-08T01:02:03+05:00x",
"2021-11-08T01:02:03X",
"2021/11/08T01:02:03Z",
"2021-02-29T01:02:03Z",
"2021-11-08T24:00:00Z",
] {
assert!(DateTime::from_iso_str(input).is_err());
}
}
}
8 changes: 4 additions & 4 deletions src/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ where R: Read
fn make_error(&self, msg: &str, reason: ReadErrorReason) -> ReadError {
self.byte_reader.make_error(&format!("Cpon read error - {msg}"), reason)
}
fn read_string(&mut self) -> Result<Value, ReadError> {
fn read_string(&mut self) -> Result<String, ReadError> {
let mut buff: Vec<u8> = Vec::new();
self.get_byte()?; // eat "
loop {
Expand Down Expand Up @@ -365,9 +365,9 @@ where R: Read
}
}
}
let s = std::str::from_utf8(&buff);
let s = String::from_utf8(buff);
match s {
Ok(s) => Ok(Value::from(s)),
Ok(s) => Ok(s),
Err(e) => Err(self.make_error(&format!("Invalid String, Utf8 error: {e}"), ReadErrorReason::InvalidCharacter)),
}
}
Expand Down Expand Up @@ -421,7 +421,7 @@ impl<R> Reader for JsonReader<'_, R>
let b = self.peek_byte()?;
let v = match &b {
b'0' ..= b'9' | b'+' | b'-' => self.read_number(),
b'"' => self.read_string(),
b'"' => self.read_string().map(Value::from),
b'[' => self.read_list(),
b'{' => self.read_map(),
b't' => self.read_true(),
Expand Down
16 changes: 4 additions & 12 deletions src/textrdwr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pub trait TextReader : Reader {
}
fn get_byte(&mut self) -> Result<u8, ReadError>;
fn make_error(&self, msg: &str, reason: ReadErrorReason) -> ReadError;
fn read_string(&mut self) -> Result<Value, ReadError>;
fn read_string(&mut self) -> Result<String, ReadError>;

fn skip_white_or_insignificant(&mut self) -> Result<(), ReadError> {
loop {
Expand Down Expand Up @@ -313,20 +313,12 @@ pub trait TextReader : Reader {
break;
}
let key = self.read_string();
let skey = match &key {
Ok(b) => {
match b {
Value::String(s) => {
s
},
_ => return Err(self.make_error("Read MetaMap key internal error", ReadErrorReason::InvalidCharacter)),
}
},
_ => return Err(self.make_error(&format!("Invalid Map key '{b}'"), ReadErrorReason::InvalidCharacter)),
let Ok(skey) = key else {
return Err(self.make_error(&format!("Invalid Map key '{b}'"), ReadErrorReason::InvalidCharacter));
};
self.skip_white_or_insignificant()?;
let val = self.read()?;
map.insert(skey.to_string(), val);
map.insert(skey, val);
}
Ok(Value::from(map))
}
Expand Down
Loading