Skip to content

Commit d8c6c70

Browse files
set up profile
1 parent de215b7 commit d8c6c70

4 files changed

Lines changed: 55 additions & 6 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
/target
22
hits.csv
33
hits_100mb.csv
4+
profile.json.gz

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ authors = ["Matthew Kim"]
44
version = "0.1.0"
55
edition = "2024"
66
description = "A CSV parser"
7-
license = "MIT"
7+
license = "Apache 2.0"
88

99
[dependencies]
1010
arrow-schema = "55"
@@ -25,6 +25,7 @@ name = "parse"
2525
harness = false
2626

2727
[profile.release]
28-
lto = "fat"
28+
debug = 2
29+
# lto = "fat"
2930
panic = "abort"
3031
codegen-units = 1

profile.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
if [ "$#" -ne 1 ]; then
6+
echo "Usage: $0 <file>"
7+
exit 1
8+
fi
9+
10+
FILE=$1
11+
12+
if [ ! -f "$FILE" ]; then
13+
echo "Error: File '$FILE' not found!"
14+
exit 1
15+
fi
16+
17+
cargo b --release && samply record ./target/release/arrow-csv2 "$FILE"

src/main.rs

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,41 @@
1-
use arrow_csv2::read;
1+
use std::sync::Arc;
2+
3+
use arrow_csv2::ReaderBuilder;
4+
use arrow_schema::{DataType, Field, Schema};
25

36
fn main() -> Result<(), Box<dyn std::error::Error>> {
47
let path = std::env::args().nth(1).expect("expect .csv file path");
5-
let mut data = std::fs::read(path)?;
6-
let rows = read(&mut data);
8+
let raw = std::fs::read(path)?;
9+
10+
let num_columns = raw
11+
.iter()
12+
.position(|&b| b == b'\n')
13+
.map(|nl| raw[..nl].iter().filter(|&&b| b == b',').count() + 1)
14+
.unwrap_or(1);
15+
16+
let schema = Arc::new(Schema::new(
17+
(0..num_columns)
18+
.map(|i| Field::new(format!("c{i}"), DataType::Utf8, true))
19+
.collect::<Vec<_>>(),
20+
));
21+
22+
let mut decoder = ReaderBuilder::new(schema)
23+
.with_batch_size(8192)
24+
.build_decoder();
725

8-
println!("{:?}", rows);
26+
let mut offset = 0;
27+
loop {
28+
let consumed = decoder.decode(&raw[offset..])?;
29+
offset += consumed;
30+
if consumed == 0 || decoder.capacity() == 0 {
31+
if let Some(batch) = decoder.flush()? {
32+
std::hint::black_box(batch.num_rows());
33+
}
34+
if consumed == 0 && decoder.capacity() > 0 {
35+
break;
36+
}
37+
}
38+
}
939

1040
Ok(())
1141
}

0 commit comments

Comments
 (0)