Skip to content

Commit 0c0d41f

Browse files
fuse classify
1 parent d8c6c70 commit 0c0d41f

1 file changed

Lines changed: 29 additions & 22 deletions

File tree

src/decoder.rs

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use arrow_array::builder::{
77
use arrow_array::{ArrayRef, RecordBatch};
88
use arrow_schema::{ArrowError, DataType, SchemaRef};
99

10-
use crate::classify::{COMMA, NEWLINE, QUOTES, classify};
10+
use crate::classify::{COMMA, HIGH_NIBBLES, LOW_NIBBLES, NEWLINE, QUOTES};
1111
use crate::u8x16;
1212

1313
#[derive(Debug)]
@@ -69,24 +69,28 @@ impl Decoder {
6969
let mut padded = buf.to_vec();
7070
padded.resize(padded.len().next_multiple_of(64), 0);
7171

72-
// phase 1: classify
73-
let vectors = classify(&padded);
74-
75-
// phase 2: bitsets
72+
// phase 1+2: classify and build bitsets in one pass
73+
let low_nibbles = u8x16::from_slice_unchecked(&LOW_NIBBLES);
74+
let high_nibbles = u8x16::from_slice_unchecked(&HIGH_NIBBLES);
7675
let comma_bc = u8x16::broadcast(COMMA);
7776
let newline_bc = u8x16::broadcast(NEWLINE);
7877
let quote_bc = u8x16::broadcast(QUOTES);
7978

80-
let cap = vectors.len() / 4;
79+
let cap = padded.len() / 64;
8180
let mut comma_bitsets = Vec::with_capacity(cap);
8281
let mut newline_bitsets = Vec::with_capacity(cap);
8382
let mut quote_bitsets = Vec::with_capacity(cap);
8483

85-
vectors.chunks_exact(4).for_each(|chunk| {
86-
comma_bitsets.push(build_u64(chunk, comma_bc));
87-
newline_bitsets.push(build_u64(chunk, newline_bc));
88-
quote_bitsets.push(build_u64(chunk, quote_bc));
89-
});
84+
for chunk in padded.chunks_exact(64) {
85+
let v0 = classify_one(&chunk[0..16], high_nibbles, low_nibbles);
86+
let v1 = classify_one(&chunk[16..32], high_nibbles, low_nibbles);
87+
let v2 = classify_one(&chunk[32..48], high_nibbles, low_nibbles);
88+
let v3 = classify_one(&chunk[48..64], high_nibbles, low_nibbles);
89+
90+
comma_bitsets.push(build_u64_from_classified(v0, v1, v2, v3, comma_bc));
91+
newline_bitsets.push(build_u64_from_classified(v0, v1, v2, v3, newline_bc));
92+
quote_bitsets.push(build_u64_from_classified(v0, v1, v2, v3, quote_bc));
93+
}
9094

9195
// phase 3: quote mask
9296
let mut carry = false;
@@ -133,9 +137,7 @@ impl Decoder {
133137
if !self.header_skipped {
134138
if let Some(&nl) = self.cached_newline_pos.first() {
135139
let pos = nl as usize;
136-
if self.cached_buf[pos] == b'\r'
137-
&& self.cached_buf.get(pos + 1) == Some(&b'\n')
138-
{
140+
if self.cached_buf[pos] == b'\r' && self.cached_buf.get(pos + 1) == Some(&b'\n') {
139141
self.cached_start = pos + 2;
140142
if self
141143
.cached_newline_pos
@@ -343,8 +345,6 @@ impl Decoder {
343345
}
344346
self.field_offsets.push(self.field_data.len());
345347
}
346-
347-
348348
}
349349

350350
macro_rules! build_primitive {
@@ -372,12 +372,19 @@ macro_rules! build_primitive {
372372
}
373373
use build_primitive;
374374

375-
#[inline]
376-
fn build_u64(chunks: &[u8x16], broadcast: u8x16) -> u64 {
377-
let a = chunks[0].eq(broadcast).bitset() as u64;
378-
let b = chunks[1].eq(broadcast).bitset() as u64;
379-
let c = chunks[2].eq(broadcast).bitset() as u64;
380-
let d = chunks[3].eq(broadcast).bitset() as u64;
375+
#[inline(always)]
376+
fn classify_one(chunk: &[u8], high_nibbles: u8x16, low_nibbles: u8x16) -> u8x16 {
377+
let v = u8x16::from_slice_unchecked(chunk);
378+
let (high, low) = v.nibbles();
379+
high_nibbles.classify(high) & low_nibbles.classify(low)
380+
}
381+
382+
#[inline(always)]
383+
fn build_u64_from_classified(v0: u8x16, v1: u8x16, v2: u8x16, v3: u8x16, broadcast: u8x16) -> u64 {
384+
let a = v0.eq(broadcast).bitset() as u64;
385+
let b = v1.eq(broadcast).bitset() as u64;
386+
let c = v2.eq(broadcast).bitset() as u64;
387+
let d = v3.eq(broadcast).bitset() as u64;
381388
a | (b << 16) | (c << 32) | (d << 48)
382389
}
383390

0 commit comments

Comments
 (0)