Skip to content

Commit 0b4c75e

Browse files
committed
Add support for passing context
When serving data from remote location, one might need to pass the request context to backend storage of entries, for example for distributed tracing to work.
1 parent bc5258f commit 0b4c75e

File tree

8 files changed

+424
-60
lines changed

8 files changed

+424
-60
lines changed

README.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@ zipserve
88
Package zipserve implements serving virtual zip archives over HTTP,
99
with support for range queries and resumable downloads. Zipserve keeps only the
1010
archive headers in memory (similar to archive/zip when streaming).
11-
The actual file data is fetched on demand from user-provided ReaderAt,
12-
so the file data can be fetched from a remote location.
13-
Zipserve needs to know CRC32 of the uncompressed data, compressed and uncompressed size of files in advance,
14-
which must be supplied by the user.
11+
Zipserve fetches file data on demand from user-provided `io.ReaderAt` or `zipserve.ReaderAt`,
12+
so the file data can be fetched from a remote location.
13+
`zipserve.ReaderAt` supports passing request context to the backing store.
14+
15+
The user has to provide CRC32 of the uncompressed data, compressed and uncompressed size of files in advance.
16+
These can be computed for example during file uploads.
1517

1618
Differences to archive/zip
1719
--------------------------
@@ -35,8 +37,7 @@ so there aren't many commits. I update the module when a new version of Go is re
3537
License
3638
-------
3739

38-
Three clause BSD (same as Go) for files in this package (see [LICENSE](LICENSE)),
39-
Apache 2.0 for readerutil package from go4.org which is used as a dependency.
40+
Three clause BSD (same as Go), see [LICENSE](LICENSE).
4041

4142
Alternatives
4243
------------

archive.go

Lines changed: 42 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ package zipserve
1717

1818
import (
1919
"bytes"
20+
"context"
2021
"crypto/md5"
2122
"encoding/binary"
2223
"encoding/hex"
2324
"errors"
2425
"fmt"
25-
"go4.org/readerutil"
2626
"io"
2727
"net/http"
2828
"strings"
@@ -34,6 +34,9 @@ type Template struct {
3434
// Prefix is the content at the beginning of the file before ZIP entries.
3535
//
3636
// It may be used to create self-extracting archives, for example.
37+
//
38+
// Prefix may implement ReaderAt interface from this package, in that case
39+
// Prefix's ReadAtContext method will be called instead of ReadAt.
3740
Prefix io.ReaderAt
3841

3942
// PrefixSize is size of Prefix in bytes.
@@ -54,25 +57,11 @@ type Template struct {
5457
CreateTime time.Time
5558
}
5659

57-
type partsBuilder struct {
58-
parts []readerutil.SizeReaderAt
59-
offset int64
60-
}
61-
62-
func (pb *partsBuilder) add(r readerutil.SizeReaderAt) {
63-
size := r.Size()
64-
if size == 0 {
65-
return
66-
}
67-
pb.parts = append(pb.parts, r)
68-
pb.offset += size
69-
}
70-
7160
// Archive represents the ZIP file data to be downloaded by the user.
7261
//
7362
// It is a ReaderAt, so allows concurrent access to different byte ranges of the archive.
7463
type Archive struct {
75-
data readerutil.SizeReaderAt
64+
parts multiReaderAt
7665
createTime time.Time
7766
etag string
7867
}
@@ -89,9 +78,9 @@ func NewArchive(t *Template) (*Archive, error) {
8978
return newArchive(t, bufferView, nil)
9079
}
9180

92-
type bufferViewFunc func(content func(w io.Writer) error) (readerutil.SizeReaderAt, error)
81+
type bufferViewFunc func(content func(w io.Writer) error) (sizeReaderAt, error)
9382

94-
func bufferView(content func(w io.Writer) error) (readerutil.SizeReaderAt, error) {
83+
func bufferView(content func(w io.Writer) error) (sizeReaderAt, error) {
9584
var buf bytes.Buffer
9685

9786
err := content(&buf)
@@ -101,17 +90,24 @@ func bufferView(content func(w io.Writer) error) (readerutil.SizeReaderAt, error
10190
return bytes.NewReader(buf.Bytes()), nil
10291
}
10392

93+
func readerAt(r io.ReaderAt) ReaderAt {
94+
if v, ok := r.(ReaderAt); ok {
95+
return v
96+
}
97+
return ignoreContext{r: r}
98+
}
99+
104100
func newArchive(t *Template, view bufferViewFunc, testHookCloseSizeOffset func(size, offset uint64)) (*Archive, error) {
105101
if len(t.Comment) > uint16max {
106102
return nil, errors.New("comment too long")
107103
}
108104

105+
ar := new(Archive)
109106
dir := make([]*header, 0, len(t.Entries))
110-
var pb partsBuilder
111107
etagHash := md5.New()
112108

113109
if t.Prefix != nil {
114-
pb.add(&addsize{size: t.PrefixSize, source: t.Prefix})
110+
ar.parts.add(readerAt(t.Prefix), t.PrefixSize)
115111

116112
var buf [8]byte
117113
binary.LittleEndian.PutUint64(buf[:], uint64(t.PrefixSize))
@@ -122,28 +118,28 @@ func newArchive(t *Template, view bufferViewFunc, testHookCloseSizeOffset func(s
122118

123119
for _, entry := range t.Entries {
124120
prepareEntry(entry)
125-
dir = append(dir, &header{FileHeader: entry, offset: uint64(pb.offset)})
121+
dir = append(dir, &header{FileHeader: entry, offset: uint64(ar.parts.size)})
126122
header, err := view(func(w io.Writer) error {
127123
return writeHeader(w, entry)
128124
})
129125
if err != nil {
130126
return nil, err
131127
}
132-
pb.add(header)
128+
ar.parts.addSizeReaderAt(header)
133129
io.Copy(etagHash, io.NewSectionReader(header, 0, header.Size()))
134130
if strings.HasSuffix(entry.Name, "/") {
135131
if entry.Content != nil {
136132
return nil, errors.New("directory entry non-nil content")
137133
}
138134
} else {
139135
if entry.Content != nil {
140-
pb.add(&addsize{size: int64(entry.CompressedSize64), source: entry.Content})
136+
ar.parts.add(readerAt(entry.Content), int64(entry.CompressedSize64))
141137
} else if entry.CompressedSize64 != 0 {
142138
return nil, errors.New("empty entry with nonzero length")
143139
}
144140
// data descriptor
145141
dataDescriptor := makeDataDescriptor(entry)
146-
pb.add(bytes.NewReader(dataDescriptor))
142+
ar.parts.addSizeReaderAt(bytes.NewReader(dataDescriptor))
147143
etagHash.Write(dataDescriptor)
148144
}
149145
if entry.Modified.After(maxTime) {
@@ -153,37 +149,46 @@ func newArchive(t *Template, view bufferViewFunc, testHookCloseSizeOffset func(s
153149

154150
// capture central directory offset and comment so that content func for central directory
155151
// may be called multiple times and we don't store reference to t in the closure
156-
centralDirectoryOffset := pb.offset
152+
centralDirectoryOffset := ar.parts.size
157153
comment := t.Comment
158154
centralDirectory, err := view(func(w io.Writer) error {
159155
return writeCentralDirectory(centralDirectoryOffset, dir, w, comment, testHookCloseSizeOffset)
160156
})
161157
if err != nil {
162158
return nil, err
163159
}
164-
pb.add(centralDirectory)
160+
ar.parts.addSizeReaderAt(centralDirectory)
165161
io.Copy(etagHash, io.NewSectionReader(centralDirectory, 0, centralDirectory.Size()))
166162

167-
createTime := t.CreateTime
168-
if createTime.IsZero() {
169-
createTime = maxTime
163+
ar.createTime = t.CreateTime
164+
if ar.createTime.IsZero() {
165+
ar.createTime = maxTime
170166
}
171167

172-
etag := fmt.Sprintf("\"%s\"", hex.EncodeToString(etagHash.Sum(nil)))
168+
ar.etag = fmt.Sprintf("\"%s\"", hex.EncodeToString(etagHash.Sum(nil)))
173169

174-
return &Archive{
175-
data: readerutil.NewMultiReaderAt(pb.parts...),
176-
createTime: createTime,
177-
etag: etag}, nil
170+
return ar, nil
178171
}
179172

180173
// Size returns the size of the archive in bytes.
181-
func (ar *Archive) Size() int64 { return ar.data.Size() }
174+
func (ar *Archive) Size() int64 { return ar.parts.Size() }
182175

183176
// ReadAt provides the data of the file.
184177
//
178+
// This is same as calling ReadAtContext with context.TODO()
179+
//
185180
// See io.ReaderAt for the interface.
186-
func (ar *Archive) ReadAt(p []byte, off int64) (int, error) { return ar.data.ReadAt(p, off) }
181+
func (ar *Archive) ReadAt(p []byte, off int64) (int, error) { return ar.parts.ReadAtContext(context.TODO(), p, off) }
182+
183+
// ReadAtContext provides the data of the file.
184+
//
185+
// This methods implements ReaderAt interface.
186+
//
187+
// The context is passed to ReadAtContext of individual entries, if they implement it. The context is ignored if an
188+
// entry implements just io.ReaderAt.
189+
func (ar *Archive) ReadAtContext(ctx context.Context, p []byte, off int64) (int, error) {
190+
return ar.parts.ReadAtContext(ctx, p, off)
191+
}
187192

188193
// ServeHTTP serves the archive over HTTP.
189194
//
@@ -202,14 +207,6 @@ func (ar *Archive) ServeHTTP(w http.ResponseWriter, r *http.Request) {
202207
w.Header().Set("Etag", ar.etag)
203208
}
204209

205-
readseeker := io.NewSectionReader(ar.data, 0, ar.data.Size())
210+
readseeker := io.NewSectionReader(withContext{r: &ar.parts, ctx: r.Context()}, 0, ar.parts.Size())
206211
http.ServeContent(w, r, "", ar.createTime, readseeker)
207212
}
208-
209-
type addsize struct {
210-
size int64
211-
source io.ReaderAt
212-
}
213-
214-
func (as *addsize) Size() int64 { return as.size }
215-
func (as *addsize) ReadAt(p []byte, off int64) (int, error) { return as.source.ReadAt(p, off) }

go.mod

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
11
module github.com/martin-sucha/zipserve
22

33
go 1.12
4-
5-
require go4.org v0.0.0-20180417224846-9599cf28b011

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +0,0 @@
1-
go4.org v0.0.0-20180417224846-9599cf28b011 h1:i0QTVNl3j6yciHiQIHxz+mnsSQqo/xi78EGN7yNpMVw=
2-
go4.org v0.0.0-20180417224846-9599cf28b011/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1yOyC1qaOBpL57BhE=

io.go

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
package zipserve
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"sort"
8+
)
9+
10+
// ReaderAt is like io.ReaderAt, but also takes context.
11+
type ReaderAt interface {
12+
// ReadAtContext has same semantics as ReadAt from io.ReaderAt, but takes context.
13+
ReadAtContext(ctx context.Context, p []byte, off int64) (n int, err error)
14+
}
15+
16+
type sizeReaderAt interface {
17+
io.ReaderAt
18+
Size() int64
19+
}
20+
21+
type offsetAndData struct {
22+
offset int64
23+
data ReaderAt
24+
}
25+
26+
// multiReaderAt is a ReaderAt that joins multiple ReaderAt sequentially together.
27+
type multiReaderAt struct {
28+
parts []offsetAndData
29+
size int64
30+
}
31+
32+
// add a part to the multiContextReader.
33+
// add can be used only before the reader is read from.
34+
func (mcr *multiReaderAt) add(data ReaderAt, size int64) {
35+
switch {
36+
case size < 0:
37+
panic(fmt.Sprintf("size cannot be negative: %v", size))
38+
case size == 0:
39+
return
40+
}
41+
mcr.parts = append(mcr.parts, offsetAndData{
42+
offset: mcr.size,
43+
data: data,
44+
})
45+
mcr.size += size
46+
}
47+
48+
// addSizeReaderAt is like add, but takes sizeReaderAt
49+
func (mcr *multiReaderAt) addSizeReaderAt(r sizeReaderAt) {
50+
mcr.add(ignoreContext{r: r}, r.Size())
51+
}
52+
53+
// endOffset is offset where the given part ends.
54+
func (mcr *multiReaderAt) endOffset(partIndex int) int64 {
55+
if partIndex == len(mcr.parts)-1 {
56+
return mcr.size
57+
}
58+
return mcr.parts[partIndex+1].offset
59+
}
60+
61+
func (mcr *multiReaderAt) ReadAtContext(ctx context.Context, p []byte, off int64) (n int, err error) {
62+
if len(p) == 0 {
63+
return 0, nil
64+
}
65+
if off >= mcr.size {
66+
return 0, io.EOF
67+
}
68+
// find first part that has data for p
69+
firstPartIndex := sort.Search(len(mcr.parts), func(i int) bool {
70+
return mcr.endOffset(i) > off
71+
})
72+
for partIndex := firstPartIndex; partIndex < len(mcr.parts) && len(p) > 0; partIndex++ {
73+
if partIndex > firstPartIndex {
74+
off = mcr.parts[partIndex].offset
75+
}
76+
partRemainingBytes := mcr.endOffset(partIndex) - off
77+
sizeToRead := int64(len(p))
78+
if sizeToRead > partRemainingBytes {
79+
sizeToRead = partRemainingBytes
80+
}
81+
n2, err2 := mcr.parts[partIndex].data.ReadAtContext(ctx, p[0:sizeToRead], off - mcr.parts[partIndex].offset)
82+
n += n2
83+
if err2 != nil {
84+
return n, err2
85+
}
86+
p = p[n2:]
87+
}
88+
if len(p) > 0 {
89+
// tried reading beyond size
90+
return n, io.EOF
91+
}
92+
return n, nil
93+
}
94+
95+
func (mcr *multiReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
96+
return mcr.ReadAtContext(context.TODO(), p, off)
97+
}
98+
99+
func (mcr *multiReaderAt) Size() int64 {
100+
return mcr.size
101+
}
102+
103+
// ignoreContext converts io.ReaderAt to ReaderAt
104+
type ignoreContext struct {
105+
r io.ReaderAt
106+
}
107+
108+
func (a ignoreContext) ReadAtContext(_ context.Context, p []byte, off int64) (n int, err error) {
109+
return a.r.ReadAt(p, off)
110+
}
111+
112+
// withContext converts ReaderAt to io.ReaderAt.
113+
//
114+
// While usually we shouldn't store context in a structure, we ensure that withContext lives only within single
115+
// request.
116+
type withContext struct {
117+
ctx context.Context
118+
r ReaderAt
119+
}
120+
121+
func (w withContext) ReadAt(p []byte, off int64) (n int, err error) {
122+
return w.r.ReadAtContext(w.ctx, p, off)
123+
}

0 commit comments

Comments
 (0)