Skip to content

Commit 8b560e4

Browse files
committed
Swap chrono::NaiveDateTime::parse_from_str for a custom implementation
Initial version written by AI, cleaned up by me by using pattern matching.
1 parent 59f2ca6 commit 8b560e4

1 file changed

Lines changed: 164 additions & 43 deletions

File tree

src/datetime.rs

Lines changed: 164 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
use std::cmp::Ordering;
44
use std::fmt;
5-
use chrono::{FixedOffset, NaiveDateTime, Offset};
5+
use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone};
66

77
/// msec: 57, tz: 7;
88
/// tz is stored as signed count of quarters of hour (15 min)
@@ -67,53 +67,123 @@ impl DateTime {
6767
Self::from_epoch_msec_tz(epoch_msec, 0)
6868
}
6969
pub fn from_iso_str(iso_str: &str) -> Result<DateTime, String> {
70-
const PATTERN: &str = "2020-02-03T11:59:43";
71-
if iso_str.len() >= PATTERN.len() {
72-
let s = iso_str;
73-
let naive_str = &s[..PATTERN.len()];
74-
if let Ok(ndt) = chrono::NaiveDateTime::parse_from_str(naive_str, "%Y-%m-%dT%H:%M:%S") {
75-
let mut msec = 0;
76-
let mut offset = 0;
77-
let mut rest = &s[PATTERN.len()..];
78-
if matches!(rest.as_bytes().first(), Some(b'.')) {
79-
rest = &rest[1..];
80-
if rest.len() >= 3 {
81-
match rest[..3].parse::<i32>() {
82-
Ok(ms) => {
83-
msec = ms;
84-
rest = &rest[3..];
85-
}
86-
Err(err) => {
87-
return Err(format!("Parsing DateTime msec part error: {err}, in '{iso_str}"))
88-
}
89-
}
90-
}
70+
const BASE_LEN: usize = 19;
71+
let b = iso_str.as_bytes();
72+
let invalid_datetime = || format!("Invalid DateTime: '{iso_str}'");
73+
74+
// Check if we have at least enough characters for a timestamp without a ms or timezone.
75+
let Some((base, mut rest)) = b.split_at_checked(BASE_LEN) else {
76+
return Err(invalid_datetime());
77+
};
78+
79+
fn parse_u32(slice: &[u8]) -> Option<u32> {
80+
let mut value = 0;
81+
for &byte in slice {
82+
if !byte.is_ascii_digit() {
83+
return None;
84+
}
85+
value = value * 10 + u32::from(byte - b'0');
86+
}
87+
Some(value)
88+
}
89+
90+
fn take_u32(slice: &mut &[u8], len: usize) -> Option<u32> {
91+
let (head, tail) = slice.split_at_checked(len)?;
92+
*slice = tail;
93+
parse_u32(head)
94+
}
95+
96+
let &[
97+
y0, y1, y2, y3, b'-',
98+
mo0, mo1, b'-',
99+
d0, d1, b'T',
100+
h0, h1, b':',
101+
mi0, mi1, b':',
102+
s0, s1,
103+
] = base else {
104+
return Err(invalid_datetime());
105+
};
106+
107+
let year = parse_u32(&[y0, y1, y2, y3]).ok_or_else(invalid_datetime)?;
108+
let month = parse_u32(&[mo0, mo1]).ok_or_else(invalid_datetime)?;
109+
let day = parse_u32(&[d0, d1]).ok_or_else(invalid_datetime)?;
110+
111+
let naive_date = NaiveDate::from_ymd_opt(year.cast_signed(), month, day).ok_or_else(invalid_datetime)?;
112+
113+
let hour = parse_u32(&[h0, h1]).ok_or_else(invalid_datetime)?;
114+
let minute = parse_u32(&[mi0, mi1]).ok_or_else(invalid_datetime)?;
115+
let second = parse_u32(&[s0, s1]).ok_or_else(invalid_datetime)?;
116+
117+
let invalid_datetime_msec = || format!("Parsing DateTime msec part error, in '{iso_str}'");
118+
119+
let msec = if let Some((&b'.', tail)) = rest.split_first() {
120+
rest = tail;
121+
let digits = rest.iter().take(3).take_while(|&&c| c.is_ascii_digit()).count();
122+
if digits == 0 {
123+
return Err(invalid_datetime_msec());
124+
}
125+
let val = take_u32(&mut rest, digits).ok_or_else(invalid_datetime_msec)?;
126+
127+
// The fractional part can have more than 3 digits of precision, we cut the rest.
128+
let skip = rest.iter().take_while(|b| b.is_ascii_digit()).count();
129+
rest = rest
130+
.get(skip..)
131+
.expect("skip comes from counting rest digits");
132+
133+
match digits {
134+
1 => val * 100,
135+
2 => val * 10,
136+
3 => val,
137+
_ => unreachable!("digits capped at 3 by .take(3)"),
138+
}
139+
} else {
140+
0
141+
};
142+
143+
let naive_time = NaiveTime::from_hms_milli_opt(hour, minute, second, msec).ok_or_else(invalid_datetime)?;
144+
145+
let invalid_datetime_part = |part| format!("Invalid DateTime TZ part: '{part:?}', date time: '{iso_str}'");
146+
147+
let offset_seconds = match rest.split_first() {
148+
Some((b'Z', tail)) => {
149+
if !tail.is_empty() {
150+
return Err(invalid_datetime_part(tail));
151+
}
152+
0
153+
},
154+
Some((&sign @ (b'+' | b'-'), tail)) => {
155+
let sign: i32 = if sign == b'-' { -1 } else { 1 };
156+
match *tail {
157+
[hh_0, hh_1, b':', mm_0, mm_1] | [hh_0, hh_1, mm_0, mm_1] => {
158+
let hh = parse_u32(&[hh_0, hh_1]).ok_or_else(|| invalid_datetime_part(tail))?;
159+
let mm = parse_u32(&[mm_0, mm_1]).ok_or_else(|| invalid_datetime_part(tail))?;
160+
sign * (hh * 3600 + mm * 60).cast_signed()
91161
}
92-
if !rest.is_empty() {
93-
if rest.len() == 1 && *rest.as_bytes().first().expect("len() is 1") == b'Z' {
94-
} else if rest.len() == 3 {
95-
if let Ok(hrs) = rest.parse::<i32>() {
96-
offset = 60 * 60 * hrs;
97-
} else {
98-
return Err(format!("Invalid DateTime TZ(3) part: '{rest}, date time: {iso_str}"))
99-
}
100-
} else if rest.len() == 5 {
101-
if let Ok(hrs) = rest.parse::<i32>() {
102-
offset = 60 * (60 * (hrs / 100) + (hrs % 100));
103-
} else {
104-
return Err(format!("Invalid DateTime TZ(5) part: '{rest}, date time: {iso_str}"))
105-
}
106-
} else {
107-
return Err(format!("Invalid DateTime TZ part: '{rest}, date time: {iso_str}"))
108-
}
162+
[hh_0, hh_1] => {
163+
let hh = parse_u32(&[hh_0, hh_1]).ok_or_else(|| invalid_datetime_part(tail))?;
164+
sign * (hh * 3600).cast_signed()
109165
}
110-
let epoch_msec = (ndt.and_utc().timestamp() - i64::from(offset)) * 1000 + i64::from(msec);
111-
let dt = DateTime::from_epoch_msec_tz(epoch_msec, offset);
112-
return Ok(dt)
166+
_ => return Err(invalid_datetime_part(tail)),
113167
}
168+
},
169+
None => {
170+
0
171+
}
172+
_ => {
173+
return Err(invalid_datetime_part(rest));
114174
}
115-
Err(format!("Invalid DateTime: '{iso_str:?}"))
175+
};
176+
177+
let tz = FixedOffset::east_opt(offset_seconds).ok_or_else(|| format!("Invalid timezone offset seconds: {offset_seconds}"))?;
178+
179+
let naive_datetime = naive_date.and_time(naive_time);
180+
let chrono_dt = tz.from_local_datetime(&naive_datetime).single().ok_or_else(invalid_datetime)?;
181+
let epoch_msec = chrono_dt.timestamp_millis();
182+
183+
let dt = DateTime::from_epoch_msec_tz(epoch_msec, offset_seconds);
184+
Ok(dt)
116185
}
186+
117187
pub fn epoc_msec_utc_offset(self) -> (i64, i32) {
118188
let msec= self.0 / (TZ_MASK + 1);
119189
let mut offset = self.0 & TZ_MASK;
@@ -230,3 +300,54 @@ impl From<NaiveDateTime> for DateTime {
230300
DateTime::from_naive_datetime(&ndt)
231301
}
232302
}
303+
304+
#[cfg(test)]
305+
mod test {
306+
use super::DateTime;
307+
308+
const MINUTE: i32 = 60;
309+
const HOUR: i32 = 60 * MINUTE;
310+
311+
#[test]
312+
fn from_iso_str_parses_timezone_forms() {
313+
for (input, expected) in [
314+
("2021-11-08T01:02:03", DateTime::from_epoch_msec_tz(1_636_333_323_000, 0)),
315+
("2021-11-08T01:02:03Z", DateTime::from_epoch_msec_tz(1_636_333_323_000, 0)),
316+
("2021-11-08T01:02:03+05", DateTime::from_epoch_msec_tz(1_636_315_323_000, 5 * HOUR)),
317+
("2021-11-08T01:02:03+05:30", DateTime::from_epoch_msec_tz(1_636_313_523_000, 5 * HOUR + 30 * MINUTE)),
318+
("2021-11-08T01:02:03-0815", DateTime::from_epoch_msec_tz(1_636_363_023_000, -8 * HOUR - 15 * MINUTE)),
319+
] {
320+
assert_eq!(DateTime::from_iso_str(input), Ok(expected));
321+
}
322+
}
323+
324+
#[test]
325+
fn from_iso_str_parses_fractional_milliseconds() {
326+
for (input, expected) in [
327+
("2021-11-08T01:02:03.1Z", DateTime::from_epoch_msec_tz(1_636_333_323_100, 0)),
328+
("2021-11-08T01:02:03.12Z", DateTime::from_epoch_msec_tz(1_636_333_323_120, 0)),
329+
("2021-11-08T01:02:03.123Z", DateTime::from_epoch_msec_tz(1_636_333_323_123, 0)),
330+
("2021-11-08T01:02:03.1234Z", DateTime::from_epoch_msec_tz(1_636_333_323_123, 0)),
331+
] {
332+
assert_eq!(DateTime::from_iso_str(input), Ok(expected));
333+
}
334+
}
335+
336+
#[test]
337+
fn from_iso_str_rejects_invalid_inputs() {
338+
for input in [
339+
"2021-11-08T01:02:03.",
340+
"2021-11-08T01:02:03+",
341+
"2021-11-08T01:02:03+0",
342+
"2021-11-08T01:02:03+050",
343+
"2021-11-08T01:02:03+05:0",
344+
"2021-11-08T01:02:03+05:00x",
345+
"2021-11-08T01:02:03X",
346+
"2021/11/08T01:02:03Z",
347+
"2021-02-29T01:02:03Z",
348+
"2021-11-08T24:00:00Z",
349+
] {
350+
assert!(DateTime::from_iso_str(input).is_err());
351+
}
352+
}
353+
}

0 commit comments

Comments
 (0)