File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 11/target
22hits.csv
33hits_100mb.csv
4+ profile.json.gz
Original file line number Diff line number Diff line change @@ -4,7 +4,7 @@ authors = ["Matthew Kim"]
44version = " 0.1.0"
55edition = " 2024"
66description = " A CSV parser"
7- license = " MIT "
7+ license = " Apache 2.0 "
88
99[dependencies ]
1010arrow-schema = " 55"
@@ -25,6 +25,7 @@ name = "parse"
2525harness = false
2626
2727[profile .release ]
28- lto = " fat"
28+ debug = 2
29+ # lto = "fat"
2930panic = " abort"
3031codegen-units = 1
Original file line number Diff line number Diff line change 1+ #! /usr/bin/env bash
2+
3+ set -euo pipefail
4+
5+ if [ " $# " -ne 1 ]; then
6+ echo " Usage: $0 <file>"
7+ exit 1
8+ fi
9+
10+ FILE=$1
11+
12+ if [ ! -f " $FILE " ]; then
13+ echo " Error: File '$FILE ' not found!"
14+ exit 1
15+ fi
16+
17+ cargo b --release && samply record ./target/release/arrow-csv2 " $FILE "
Original file line number Diff line number Diff line change 1- use arrow_csv2:: read;
1+ use std:: sync:: Arc ;
2+
3+ use arrow_csv2:: ReaderBuilder ;
4+ use arrow_schema:: { DataType , Field , Schema } ;
25
36fn main ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
47 let path = std:: env:: args ( ) . nth ( 1 ) . expect ( "expect .csv file path" ) ;
5- let mut data = std:: fs:: read ( path) ?;
6- let rows = read ( & mut data) ;
8+ let raw = std:: fs:: read ( path) ?;
9+
10+ let num_columns = raw
11+ . iter ( )
12+ . position ( |& b| b == b'\n' )
13+ . map ( |nl| raw[ ..nl] . iter ( ) . filter ( |& & b| b == b',' ) . count ( ) + 1 )
14+ . unwrap_or ( 1 ) ;
15+
16+ let schema = Arc :: new ( Schema :: new (
17+ ( 0 ..num_columns)
18+ . map ( |i| Field :: new ( format ! ( "c{i}" ) , DataType :: Utf8 , true ) )
19+ . collect :: < Vec < _ > > ( ) ,
20+ ) ) ;
21+
22+ let mut decoder = ReaderBuilder :: new ( schema)
23+ . with_batch_size ( 8192 )
24+ . build_decoder ( ) ;
725
8- println ! ( "{:?}" , rows) ;
26+ let mut offset = 0 ;
27+ loop {
28+ let consumed = decoder. decode ( & raw [ offset..] ) ?;
29+ offset += consumed;
30+ if consumed == 0 || decoder. capacity ( ) == 0 {
31+ if let Some ( batch) = decoder. flush ( ) ? {
32+ std:: hint:: black_box ( batch. num_rows ( ) ) ;
33+ }
34+ if consumed == 0 && decoder. capacity ( ) > 0 {
35+ break ;
36+ }
37+ }
38+ }
939
1040 Ok ( ( ) )
1141}
You can’t perform that action at this time.
0 commit comments