Skip to content

Commit acd379d

Browse files
committed
fix: address review comments — safety, endianness, docs
- Remove unused peek2() (replaced by peek_u16) - peek_u16: use u16::from_le() to ensure correct byte order on all targets - parse_number_unchecked: change from pub to private fn (only called internally when inplace=true with padded reader) - parse_key_scalar: update doc to clarify it accepts multi-byte UTF-8, not just ASCII - swar.rs: add compile_error for big-endian targets
1 parent 606ed8c commit acd379d

3 files changed

Lines changed: 12 additions & 13 deletions

File tree

sonic-number/src/swar.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
///
33
/// Based on simdjson's `parse_eight_digits_unrolled` technique.
44
/// Uses pure u64 arithmetic to process 8 ASCII digits at a time.
5-
/// Works on all little-endian architectures without actual SIMD instructions.
5+
/// Requires little-endian byte order — the u64 load must place the first
6+
/// string byte in the least-significant byte position.
7+
8+
#[cfg(target_endian = "big")]
9+
compile_error!("SWAR digit parsing requires little-endian byte order");
610

711
/// Check if 8 consecutive bytes are all ASCII digits ('0'-'9').
812
#[inline(always)]

src/parser.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,9 +309,9 @@ where
309309
}
310310

311311
/// Unchecked number parsing — requires padded buffer (PaddedSliceRead).
312-
/// Uses the full padded buffer (including padding) so unchecked access is safe.
312+
/// Only called internally when inplace=true (padded reader with ≥64 bytes padding).
313313
#[inline(always)]
314-
pub fn parse_number_unchecked(&mut self, first: u8) -> Result<ParserNumber> {
314+
fn parse_number_unchecked(&mut self, first: u8) -> Result<ParserNumber> {
315315
let reader = &mut self.read;
316316
let neg = first == b'-';
317317
let mut now = reader.index() - (!neg as usize);
@@ -347,8 +347,10 @@ where
347347
}
348348
}
349349

350-
/// Scalar key parsing for object keys — avoids SIMD setup overhead for short keys.
351-
/// Simple while loop, let LLVM optimize the unrolling.
350+
/// Fast path for keys that terminate within 24 bytes and contain no escapes (`\\`)
351+
/// or control characters (`< 0x20`). Accepts any valid UTF-8 bytes (including
352+
/// multi-byte sequences). Falls back to `parse_string_visit` on escape, control
353+
/// byte, or if no closing `"` is found within 24 bytes.
352354
///
353355
/// # Safety
354356
/// Only called when strbuf=None (padded reader path). PaddedSliceRead has 64 bytes

src/reader.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,20 +55,13 @@ pub trait Reader<'de>: Sealed {
5555
self.as_u8_slice()
5656
}
5757

58-
/// Peek next 2 bytes as a fixed-size array reference. No bounds check needed
59-
/// on padded readers (PaddedSliceRead always has ≥64 bytes of padding).
60-
#[inline(always)]
61-
fn peek2(&self) -> &[u8; 2] {
62-
unsafe { &*(self.peek_n(2).unwrap_unchecked().as_ptr() as *const [u8; 2]) }
63-
}
64-
6558
/// Peek next 2 bytes as a little-endian u16 for single-instruction matching.
6659
/// E.g., `peek_u16() == u16::from_le_bytes([b',', b'"'])` matches `,"` in one cmp.
6760
/// Returns 0 if fewer than 2 bytes remain (0 won't match any structural char).
6861
#[inline(always)]
6962
fn peek_u16(&self) -> u16 {
7063
match self.peek_n(2) {
71-
Some(s) => unsafe { (s.as_ptr() as *const u16).read_unaligned() },
64+
Some(s) => u16::from_le(unsafe { (s.as_ptr() as *const u16).read_unaligned() }),
7265
None => 0,
7366
}
7467
}

0 commit comments

Comments
 (0)