Skip to content

Commit 85ced8c

Browse files
committed
experiment sendfile
1 parent a47a1a2 commit 85ced8c

13 files changed

+294
-97
lines changed

cmd/jocko/main.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func run(cmd *cobra.Command, args []string) {
7474
Param: 1,
7575
},
7676
Reporter: &jaegercfg.ReporterConfig{
77-
LogSpans: true,
77+
//LogSpans: true,
7878
},
7979
}
8080

commitlog/reader.go

+56
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ package commitlog
22

33
import (
44
"io"
5+
"os"
56
"sync"
67

8+
"github.com/travisjeffery/jocko/log"
9+
710
"github.com/pkg/errors"
811
)
912

@@ -14,6 +17,17 @@ type Reader struct {
1417
pos int64
1518
}
1619

20+
func (r *Reader) FileAndOffset() (*os.File, int64, int) {
21+
r.mu.Lock()
22+
defer r.mu.Unlock()
23+
24+
segments := r.cl.Segments()
25+
segment := segments[r.idx]
26+
file := segment.File()
27+
//todo size
28+
size := 0
29+
return file, r.pos, size
30+
}
1731
func (r *Reader) Read(p []byte) (n int, err error) {
1832
r.mu.Lock()
1933
defer r.mu.Unlock()
@@ -56,13 +70,55 @@ func (l *CommitLog) NewReader(offset int64, maxBytes int32) (io.Reader, error) {
5670
if s == nil {
5771
return nil, errors.Wrapf(ErrSegmentNotFound, "segments: %d, offset: %d", len(l.Segments()), offset)
5872
}
73+
//TODO: offset relative?
74+
offset = offset - s.BaseOffset
5975
e, err := s.findEntry(offset)
6076
if err != nil {
6177
return nil, err
6278
}
79+
{
80+
log.Info.Printf("entry: %+v err: %v", e, err)
81+
idx := s.Index
82+
log.Info.Printf("idx: %p idx.position %d mmap: %v \n", idx, idx.position, idx.mmap[0:idx.position])
83+
}
6384
return &Reader{
6485
cl: l,
6586
idx: idx,
6687
pos: e.Position,
6788
}, nil
6889
}
90+
91+
func (l *CommitLog) SendfileParams(offset int64, maxBytes int32) (*os.File, int64, int, error) {
92+
var s *Segment
93+
var idx int
94+
if offset == 0 {
95+
// TODO: seems hackish, should at least check if segments are set.
96+
s, idx = l.Segments()[0], 0
97+
} else {
98+
s, idx = findSegment(l.Segments(), offset)
99+
}
100+
if s == nil {
101+
return nil, 0, 0, errors.Wrapf(ErrSegmentNotFound, "segments: %d, offset: %d", len(l.Segments()), offset)
102+
}
103+
//TODO: offset relative?
104+
offset = offset - s.BaseOffset
105+
e, err := s.findEntry(offset)
106+
if err != nil {
107+
return nil, 0, 0, err
108+
}
109+
{
110+
log.Info.Printf("entry: %+v err: %v", e, err)
111+
idx := s.Index
112+
log.Info.Printf("idx: %p idx.position %d mmap: %v \n", idx, idx.position, idx.mmap[0:idx.position])
113+
}
114+
file := s.File()
115+
_ = idx
116+
//todo : calculate fileOffset and sendSize
117+
fileOffset := int64(0)
118+
sendSize := s.Position
119+
// log.Info.Println("logfile:", file.Name(),
120+
// "fileOffset", fileOffset,
121+
// "sendSize", sendSize)
122+
123+
return file, fileOffset, int(sendSize), nil
124+
}

commitlog/segment.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,11 @@ func (s *Segment) IsFull() bool {
143143
defer s.Unlock()
144144
return s.Position >= s.maxBytes
145145
}
146+
func (s *Segment) File() *os.File {
147+
s.Lock()
148+
defer s.Unlock()
149+
return s.log
150+
}
146151

147152
// Write writes a byte slice to the log at the current position.
148153
// It increments the offset as well as sets the position to the new tail.
@@ -214,10 +219,10 @@ func (s *Segment) findEntry(offset int64) (e *Entry, err error) {
214219
s.Lock()
215220
defer s.Unlock()
216221
e = &Entry{}
217-
n := int(s.Index.bytes / entryWidth)
222+
n := int(s.Index.position) / entryWidth
218223
idx := sort.Search(n, func(i int) bool {
219224
_ = s.Index.ReadEntryAtFileOffset(e, int64(i*entryWidth))
220-
return e.Offset >= offset || e.Offset == 0
225+
return e.Offset >= offset
221226
})
222227
if idx == n {
223228
return nil, errors.New("entry not found")

commitlog/sendfile.go

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package commitlog
2+
3+
import (
4+
"log"
5+
"net"
6+
"os"
7+
"runtime"
8+
"syscall"
9+
)
10+
11+
const maxSendfileSize int = 4 << 20
12+
13+
func Sendfile(conn *net.TCPConn, file *os.File, offsetInt int64, size int, chunkSize int) (int, error) {
14+
offset := &offsetInt
15+
defer func() {
16+
runtime.KeepAlive(offset)
17+
}()
18+
written := 0
19+
var remain int = size
20+
n := chunkSize
21+
if chunkSize > maxSendfileSize {
22+
chunkSize = maxSendfileSize
23+
}
24+
src := int(file.Fd())
25+
rawConn, err := conn.SyscallConn()
26+
rawConn.Write(func(dst uintptr) bool {
27+
defer func() { log.Println("returned") }()
28+
for remain > 0 {
29+
if n > remain {
30+
n = remain
31+
}
32+
var err1 error
33+
log.Println("params:", n, "offset:", *offset)
34+
//todo: for bsd and darwin, pass different offset
35+
n, err1 = syscall.Sendfile(int(dst), src, offset, n)
36+
log.Println("after:", n, "offset:", *offset)
37+
if err1 != nil {
38+
log.Println("sent error:", err1.Error())
39+
}
40+
if n > 0 {
41+
written += n
42+
remain -= n
43+
} else if n == 0 && err1 == nil {
44+
break
45+
}
46+
if err1 == syscall.EAGAIN || err1 == syscall.EWOULDBLOCK {
47+
48+
if n == -1 {
49+
n = chunkSize
50+
}
51+
log.Println("got eagain")
52+
return false
53+
// waitpread, gopark
54+
}
55+
if err1 != nil {
56+
// This includes syscall.ENOSYS (no kernel
57+
// support) and syscall.EINVAL (fd types which
58+
// don't implement sendfile)
59+
err = err1
60+
break
61+
}
62+
}
63+
return true
64+
})
65+
log.Println("written", written)
66+
67+
return written, err
68+
69+
}

jocko/broker.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ func (b *Broker) Run(ctx context.Context, reqCtx *Context) *Context {
149149
case *protocol.ProduceRequest:
150150
res = b.handleProduce(reqCtx, req)
151151
case *protocol.FetchRequest:
152+
if b.config.UseSendfile {
153+
b.handleFetchSendFile(reqCtx, req)
154+
return nil
155+
}
152156
res = b.handleFetch(reqCtx, req)
153157
case *protocol.OffsetsRequest:
154158
res = b.handleOffsets(reqCtx, req)
@@ -196,7 +200,7 @@ func (b *Broker) Run(ctx context.Context, reqCtx *Context) *Context {
196200

197201
return &Context{
198202
parent: responseCtx,
199-
conn: reqCtx.conn,
203+
Conn: reqCtx.Conn,
200204
header: reqCtx.header,
201205
res: &protocol.Response{
202206
CorrelationID: reqCtx.header.CorrelationID,

jocko/commitlog.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package jocko
22

3-
import "io"
3+
import (
4+
"io"
5+
"os"
6+
)
47

58
type CommitLog interface {
69
Delete() error
@@ -9,4 +12,5 @@ type CommitLog interface {
912
NewestOffset() int64
1013
OldestOffset() int64
1114
Append([]byte) (int64, error)
15+
SendfileParams(offset int64, maxBytes int32) (*os.File, int64, int, error)
1216
}

jocko/config/config.go

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type Config struct {
3131
LeaveDrainTime time.Duration
3232
ReconcileInterval time.Duration
3333
OffsetsTopicReplicationFactor int16
34+
UseSendfile bool
3435
}
3536

3637
// DefaultConfig creates/returns a default configuration.
@@ -48,6 +49,7 @@ func DefaultConfig() *Config {
4849
LeaveDrainTime: 5 * time.Second,
4950
ReconcileInterval: 60 * time.Second,
5051
OffsetsTopicReplicationFactor: 3,
52+
UseSendfile: true,
5153
}
5254

5355
conf.SerfLANConfig.ReconnectTimeout = 3 * 24 * time.Hour

jocko/context.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package jocko
33
import (
44
"context"
55
"fmt"
6-
"io"
6+
"net"
77
"sync"
88
"time"
99

@@ -12,7 +12,7 @@ import (
1212

1313
type Context struct {
1414
mu sync.Mutex
15-
conn io.ReadWriter
15+
Conn *net.TCPConn
1616
err error
1717
header *protocol.RequestHeader
1818
parent context.Context

jocko/fetch_sendfile.go

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
package jocko
2+
3+
import (
4+
"net"
5+
"time"
6+
7+
"github.com/travisjeffery/jocko/commitlog"
8+
"github.com/travisjeffery/jocko/protocol"
9+
)
10+
11+
func (b *Broker) handleFetchSendFile(ctx *Context, r *protocol.FetchRequest) {
12+
sp := span(ctx, b.tracer, "fetch")
13+
defer sp.Finish()
14+
resp := protocol.Response{
15+
CorrelationID: ctx.header.CorrelationID,
16+
}
17+
conn := ctx.Conn
18+
fres := &protocol.FetchResponse{
19+
Responses: make(protocol.FetchTopicResponses, len(r.Topics)),
20+
}
21+
msgSetLen := 0
22+
fres.APIVersion = r.APIVersion
23+
maxBufSize := 0
24+
// calculate total length of message set
25+
//TODO calc max buf length
26+
maxBufSize = 1024
27+
for i, topic := range r.Topics {
28+
fr := &protocol.FetchTopicResponse{
29+
Topic: topic.Topic,
30+
PartitionResponses: make([]*protocol.FetchPartitionResponse, len(topic.Partitions)),
31+
}
32+
for j, p := range topic.Partitions {
33+
fpres := &protocol.FetchPartitionResponse{}
34+
fpres.Partition = p.Partition
35+
replica, err := b.replicaLookup.Replica(topic.Topic, p.Partition)
36+
if err != nil {
37+
panic(err)
38+
}
39+
var rdrErr error
40+
fpres.FileHandle, fpres.SendOffset, fpres.SendSize, rdrErr = replica.Log.SendfileParams(p.FetchOffset, p.MaxBytes)
41+
if rdrErr != nil {
42+
panic(rdrErr)
43+
}
44+
msgSetLen += fpres.SendSize
45+
//get length of record
46+
//
47+
fr.PartitionResponses[j] = fpres
48+
}
49+
fres.Responses[i] = fr
50+
}
51+
lenEnc := new(protocol.LenEncoder)
52+
err := fres.Encode(lenEnc)
53+
if err != nil {
54+
panic(err)
55+
}
56+
// set length field
57+
resp.Size = int32(lenEnc.Length + msgSetLen)
58+
err = sendRes(&resp, maxBufSize, fres, conn)
59+
if err != nil {
60+
panic(err)
61+
}
62+
return
63+
}
64+
func sendRes(resp *protocol.Response,
65+
maxSize int,
66+
r *protocol.FetchResponse,
67+
conn *net.TCPConn) error {
68+
b := make([]byte, maxSize)
69+
e := protocol.NewByteEncoder(b)
70+
// outer response
71+
correlationIDSize := int32(4)
72+
e.PutInt32(resp.Size + correlationIDSize)
73+
e.PutInt32(resp.CorrelationID)
74+
//fetch response
75+
var err error
76+
if r.APIVersion >= 1 {
77+
e.PutInt32(int32(r.ThrottleTime / time.Millisecond))
78+
}
79+
80+
if err = e.PutArrayLength(len(r.Responses)); err != nil {
81+
return err
82+
}
83+
for _, response := range r.Responses {
84+
if err = e.PutString(response.Topic); err != nil {
85+
return err
86+
}
87+
if err = e.PutArrayLength(len(response.PartitionResponses)); err != nil {
88+
return err
89+
}
90+
for _, p := range response.PartitionResponses {
91+
if err = sendResOfPartition(conn, p, e, r.APIVersion); err != nil {
92+
return err
93+
}
94+
}
95+
}
96+
return nil
97+
}
98+
func sendResOfPartition(
99+
conn *net.TCPConn,
100+
r *protocol.FetchPartitionResponse,
101+
e *protocol.ByteEncoder,
102+
version int16) error {
103+
e.PutInt32(r.Partition)
104+
e.PutInt16(r.ErrorCode)
105+
e.PutInt64(r.HighWatermark)
106+
var err error
107+
if version >= 4 {
108+
e.PutInt64(r.LastStableOffset)
109+
110+
if err = e.PutArrayLength(len(r.AbortedTransactions)); err != nil {
111+
return err
112+
}
113+
for _, t := range r.AbortedTransactions {
114+
t.Encode(e)
115+
}
116+
}
117+
e.PutInt32(int32(r.SendSize))
118+
//log.Info.Println("encoder offset", e.GetOffset())
119+
conn.Write(e.Bytes()[:e.GetOffset()])
120+
e.SetOffset(0)
121+
chunkSize := 4096
122+
if _, err = commitlog.Sendfile(conn, r.FileHandle, r.SendOffset, r.SendSize, chunkSize); err != nil {
123+
return err
124+
}
125+
126+
return nil
127+
}

0 commit comments

Comments
 (0)