Skip to content

Commit 9a766a4

Browse files
committed
introduce LineReader
1 parent 652f1e3 commit 9a766a4

File tree

8 files changed

+724
-1
lines changed

8 files changed

+724
-1
lines changed

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,8 @@
11
# go-commons
2-
A kitchen sync of data structures and algorithms in Go
2+
A kitchen sink of data structures and algorithms in Go
3+
4+
# Contents
5+
6+
## io
7+
### LineReader
8+
Read lines from a reader while truncating lines that exceed the destination buffer's size.

go.mod

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
module github.com/asymmetric-research/go-commons
2+
3+
go 1.23.0
4+
5+
require (
6+
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb
7+
github.com/stretchr/testify v1.9.0
8+
)
9+
10+
require (
11+
github.com/davecgh/go-spew v1.1.1 // indirect
12+
github.com/go-cmd/cmd v1.4.3
13+
github.com/pmezard/go-difflib v1.0.0 // indirect
14+
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e
15+
gopkg.in/yaml.v3 v3.0.1 // indirect
16+
)

go.sum

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3+
github.com/go-cmd/cmd v1.4.3 h1:6y3G+3UqPerXvPcXvj+5QNPHT02BUw7p6PsqRxLNA7Y=
4+
github.com/go-cmd/cmd v1.4.3/go.mod h1:u3hxg/ry+D5kwh8WvUkHLAMe2zQCaXd00t35WfQaOFk=
5+
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb h1:GRiLv4rgyqjqzxbhJke65IYUf4NCOOvrPOJbV/sPxkM=
6+
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb/go.mod h1:OaY7UOoTkkrX3wRwjpYRKafIkkyeD0UtweSHAWWiqQM=
7+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
8+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
9+
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
10+
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
11+
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e h1:I88y4caeGeuDQxgdoFPUq097j7kNfw6uvuiNxUBfcBk=
12+
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
13+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
14+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
15+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
16+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

io.test

5.54 MB
Binary file not shown.

io/README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# io
2+
3+
## LineReader
4+
### Usage
5+
```go
6+
lr := NewLineReader(reader, 4098 /* blockSize */)
7+
line := [12288]byte{}
8+
9+
var err error
10+
for err == nil {
11+
n, ntrunc, err := lr.Read(line[:])
12+
lastline := line[:n]
13+
fmt.Println("%d bytes didn't fit", ntrunc)
14+
}
15+
```
16+
17+
### Benchmarks
18+
```
19+
go test -benchmem -benchtime=5s -bench=. ./io/...
20+
goos: linux
21+
goarch: amd64
22+
pkg: github.com/asymmetric-research/go-commons/io
23+
cpu: AMD Ryzen 9 5950X 16-Core Processor
24+
BenchmarkLineReaderUnbuffered-32 1237466 4726 ns/op 22560 B/op 5 allocs/op
25+
BenchmarkHashicorpsUnbuffered-32 4712 1345807 ns/op 2295415 B/op 29602 allocs/op
26+
BenchmarkGoCmdUnbuffered-32 236834 24471 ns/op 41636 B/op 289 allocs/op
27+
BenchmarkLineReader-32 2206489 2722 ns/op 12328 B/op 4 allocs/op
28+
BenchmarkHashicorps-32 4239 1416668 ns/op 2285070 B/op 29601 allocs/op
29+
BenchmarkGoCmd-32 272906 21842 ns/op 31563 B/op 292 allocs/op
30+
PASS
31+
ok github.com/asymmetric-research/go-commons/io 44.384s
32+
```

io/line_reader.go

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
package io
2+
3+
import (
4+
"bytes"
5+
"io"
6+
7+
armath "github.com/asymmetric-research/go-commons/math"
8+
)
9+
10+
type LineReader struct {
11+
reader io.Reader
12+
readbufbase []byte
13+
readbuf []byte
14+
blocksize uint
15+
}
16+
17+
func NewLineReader(reader io.Reader, blockSize uint) *LineReader {
18+
lr := &LineReader{}
19+
NewlineReaderInto(lr, reader, blockSize)
20+
return lr
21+
}
22+
23+
func NewlineReaderInto(dst *LineReader, reader io.Reader, blockSize uint) {
24+
*dst = LineReader{
25+
reader: reader,
26+
readbufbase: make([]byte, blockSize),
27+
blocksize: blockSize,
28+
}
29+
}
30+
31+
// Read reads as much as possible into p, until the next newline or EOF is reached.
32+
// Every new call to read starts on a new line. The remainder of the previous line will be discarted.
33+
func (lr *LineReader) Read(dst []byte) (nread int, ndiscarted int, err error) {
34+
// copy as much of read buffer as possible to dst
35+
if len(lr.readbuf) > 0 {
36+
// fast path: can we get a new line from the read buffer?
37+
maxread := armath.Min(len(dst), len(lr.readbuf))
38+
eolidx := bytes.IndexByte(lr.readbuf[:maxread], '\n')
39+
if eolidx >= 0 && eolidx < len(dst) {
40+
// yes - copy to dst and return
41+
copy(dst[:eolidx], lr.readbuf)
42+
lr.readbuf = lr.readbuf[eolidx+1:]
43+
return eolidx, 0, nil
44+
}
45+
46+
// no - copy as much of the read buffer as possible to dst, and then continue reading from reader
47+
n := copy(dst, lr.readbuf)
48+
nread += n
49+
lr.readbuf = lr.readbuf[n:]
50+
dst = dst[n:]
51+
52+
}
53+
54+
for i := uint(0); ; i++ {
55+
readOffset := lr.blocksize * i
56+
readLimit := armath.Min(readOffset+lr.blocksize, uint(len(dst)))
57+
58+
// dst has been filled and there hasn't been a new line yet
59+
if readLimit <= readOffset {
60+
ndiscarted = lr.discardRestOfLine()
61+
return
62+
}
63+
64+
dstClamp := dst[readOffset:readLimit]
65+
var n int
66+
n, err = lr.reader.Read(dstClamp)
67+
dstClamp = dstClamp[:n]
68+
nread += n
69+
70+
if err == io.EOF && n == 0 {
71+
return
72+
} else if err != nil {
73+
return
74+
}
75+
76+
// is there a end of line in this block?
77+
eolidx := bytes.IndexByte(dstClamp, '\n')
78+
79+
if eolidx < 0 {
80+
continue
81+
}
82+
83+
// discard the new line character
84+
nread -= 1
85+
86+
// is new line at the end of read?
87+
if eolidx == int(readLimit)-1 {
88+
// yes
89+
return
90+
91+
}
92+
93+
// copy the data after the end of line into the read buffer
94+
cpyn := copy(lr.readbufbase, dstClamp[eolidx+1:])
95+
lr.readbuf = lr.readbufbase[:cpyn]
96+
nread -= n - eolidx - 1
97+
return
98+
}
99+
}
100+
101+
func (lr *LineReader) discardRestOfLine() int {
102+
// discard the rest of the line in the read buffer
103+
104+
if len(lr.readbuf) > 0 {
105+
if idx := bytes.IndexByte(lr.readbuf, '\n'); idx >= 0 {
106+
lr.readbuf = lr.readbuf[idx+1:]
107+
return idx
108+
} else {
109+
lr.readbuf = nil
110+
}
111+
}
112+
113+
// discard the rest of the line in the reader
114+
115+
prevread := 0
116+
for {
117+
n, err := lr.reader.Read(lr.readbufbase)
118+
lr.readbuf = lr.readbufbase[:n]
119+
if err != nil {
120+
return n
121+
}
122+
123+
eolidx := bytes.IndexByte(lr.readbuf, '\n')
124+
125+
if eolidx >= 0 {
126+
lr.readbuf = lr.readbuf[eolidx+1:]
127+
return eolidx + prevread
128+
}
129+
prevread += n
130+
}
131+
}

0 commit comments

Comments
 (0)