Skip to content

Commit 4ba4e88

Browse files
committed
Stream WriteTo directly to writer and fix ZIP64 local file header sizes
WriteTo streaming: - Stream ZIP archive directly to the destination writer instead of buffering the entire compressed output in a bytes.Buffer. For large workbooks this avoids 50-200 MB+ of peak memory allocation. - For encrypted files, use a temporary file instead of an in-memory buffer to reduce peak memory during the encrypt-then-write cycle. - Add countWriter to track bytes written for the int64 return value. - Use CopyTo instead of Reader+io.Copy in writeToZip for streaming worksheets, reducing syscalls via larger read buffers. ZIP64 local file header fix: - Set compressed and uncompressed size fields in the Local File Header to 0xFFFFFFFF for ZIP64 entries. The Go standard library only patches the Central Directory version to 45 for entries >4GB but leaves the LFH sizes at their original values. Excel strictly validates that ZIP64 LFH entries have sizes set to 0xFFFFFFFF with the actual sizes in the ZIP64 extended information extra field. Without this fix, workbooks containing sheets larger than 4GB a larger than 4GB a larger than 4GB a larger than 4GB a larger than 4GB s is non-empty, avoiding unnecessary scanning of the output buffer for most unnecessary scanning of the output buffbased fixup (used by encryption path).
1 parent b5fe105 commit 4ba4e88

1 file changed

Lines changed: 170 additions & 10 deletions

File tree

file.go

Lines changed: 170 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,10 @@ func (f *File) Write(w io.Writer, opts ...Options) error {
113113
return err
114114
}
115115

116-
// WriteTo implements io.WriterTo to write the file.
116+
// WriteTo implements io.WriterTo to write the file. When no password
117+
// encryption is required, the ZIP archive is streamed directly to w without
118+
// buffering the entire compressed output in memory. When password encryption
119+
// is required, a temporary file is used to reduce memory usage.
117120
func (f *File) WriteTo(w io.Writer, opts ...Options) (int64, error) {
118121
for i := range opts {
119122
f.options = &opts[i]
@@ -127,17 +130,91 @@ func (f *File) WriteTo(w io.Writer, opts ...Options) (int64, error) {
127130
return 0, err
128131
}
129132
}
130-
buf, err := f.WriteToBuffer()
133+
// Password encryption requires post-processing the entire output.
134+
// Use a temporary file to reduce peak memory usage.
135+
if f.options != nil && f.options.Password != "" {
136+
return f.writeToWithEncryption(w)
137+
}
138+
// Stream the ZIP directly to w. This avoids holding the full compressed
139+
// archive in a bytes.Buffer, which can be 50-200 MB+ for large reports.
140+
cw := &countWriter{w: w}
141+
f.zip64Entries = nil
142+
zw := f.ZipWriter(cw)
143+
if err := f.writeToZip(zw); err != nil {
144+
_ = zw.Close()
145+
return cw.n, err
146+
}
147+
return cw.n, zw.Close()
148+
}
149+
150+
// writeToWithEncryption writes an encrypted file using a temporary file to
151+
// reduce memory usage.
152+
func (f *File) writeToWithEncryption(w io.Writer) (int64, error) {
153+
var tmpDir string
154+
if f.options != nil {
155+
tmpDir = f.options.TmpDir
156+
}
157+
tmpFile, err := os.CreateTemp(tmpDir, "excelize-encrypt-*.zip")
158+
if err != nil {
159+
return 0, err
160+
}
161+
tmpPath := tmpFile.Name()
162+
defer func() {
163+
_ = tmpFile.Close()
164+
_ = os.Remove(tmpPath)
165+
}()
166+
167+
f.zip64Entries = nil
168+
zw := f.ZipWriter(tmpFile)
169+
if err := f.writeToZip(zw); err != nil {
170+
_ = zw.Close()
171+
return 0, err
172+
}
173+
if err := zw.Close(); err != nil {
174+
return 0, err
175+
}
176+
177+
if len(f.zip64Entries) > 0 {
178+
if err := f.writeZip64LFHFile(tmpFile); err != nil {
179+
return 0, err
180+
}
181+
}
182+
183+
if _, err := tmpFile.Seek(0, 0); err != nil {
184+
return 0, err
185+
}
186+
rawZip, err := io.ReadAll(tmpFile)
187+
if err != nil {
188+
return 0, err
189+
}
190+
191+
encrypted, err := Encrypt(rawZip, f.options)
131192
if err != nil {
132193
return 0, err
133194
}
134-
return buf.WriteTo(w)
195+
n, err := w.Write(encrypted)
196+
return int64(n), err
197+
}
198+
199+
// countWriter wraps an io.Writer and counts bytes written.
200+
type countWriter struct {
201+
w io.Writer
202+
n int64
203+
}
204+
205+
func (cw *countWriter) Write(p []byte) (int, error) {
206+
n, err := cw.w.Write(p)
207+
cw.n += int64(n)
208+
return n, err
135209
}
136210

137211
// WriteToBuffer provides a function to get bytes.Buffer from the saved file,
138212
// and it allocates space in memory. Be careful when the file size is large.
213+
// Consider using WriteTo with a file for large password-protected files to
214+
// reduce memory usage.
139215
func (f *File) WriteToBuffer() (*bytes.Buffer, error) {
140216
buf := new(bytes.Buffer)
217+
f.zip64Entries = nil
141218
zw := f.ZipWriter(buf)
142219

143220
if err := f.writeToZip(zw); err != nil {
@@ -147,7 +224,11 @@ func (f *File) WriteToBuffer() (*bytes.Buffer, error) {
147224
if err := zw.Close(); err != nil {
148225
return buf, err
149226
}
150-
err := f.writeZip64LFH(buf)
227+
// Only perform ZIP64 fixup if we actually have ZIP64 entries
228+
var err error
229+
if len(f.zip64Entries) > 0 {
230+
err = f.writeZip64LFH(buf)
231+
}
151232
if f.options != nil && f.options.Password != "" {
152233
b, err := Encrypt(buf.Bytes(), f.options)
153234
if err != nil {
@@ -180,13 +261,9 @@ func (f *File) writeToZip(zw ZipWriter) error {
180261
if err != nil {
181262
return err
182263
}
183-
var from io.Reader
184-
if from, err = stream.rawData.Reader(); err != nil {
185-
_ = stream.rawData.Close()
186-
return err
187-
}
188-
written, err := io.Copy(fi, from)
264+
written, err := stream.rawData.CopyTo(fi)
189265
if err != nil {
266+
_ = stream.rawData.Close()
190267
return err
191268
}
192269
if written > math.MaxUint32 {
@@ -267,8 +344,91 @@ func (f *File) writeZip64LFH(buf *bytes.Buffer) error {
267344
}
268345
if inStrSlice(f.zip64Entries, string(data[idx+30:idx+30+filenameLen]), true) != -1 {
269346
binary.LittleEndian.PutUint16(data[idx+4:idx+6], 45)
347+
// Set compressed and uncompressed sizes to 0xFFFFFFFF to indicate
348+
// that the actual sizes are in the ZIP64 extended information field.
349+
// Without this, readers see size=0 or a truncated 32-bit value
350+
// which causes corruption errors.
351+
binary.LittleEndian.PutUint32(data[idx+18:idx+22], 0xFFFFFFFF)
352+
binary.LittleEndian.PutUint32(data[idx+22:idx+26], 0xFFFFFFFF)
270353
}
271354
offset = idx + 1
272355
}
273356
return nil
274357
}
358+
359+
// writeZip64LFHFile performs ZIP64 local file header fixup on a file.
360+
// This is used when encrypting to avoid loading the entire file into memory.
361+
func (f *File) writeZip64LFHFile(file *os.File) error {
362+
if len(f.zip64Entries) == 0 {
363+
return nil
364+
}
365+
if _, err := file.Seek(0, 0); err != nil {
366+
return err
367+
}
368+
info, err := file.Stat()
369+
if err != nil {
370+
return err
371+
}
372+
fileSize := info.Size()
373+
374+
const chunkSize = 1024 * 1024 // 1MB chunks
375+
buf := make([]byte, chunkSize)
376+
var offset int64
377+
378+
for offset < fileSize {
379+
n, err := file.ReadAt(buf, offset)
380+
if err != nil && err != io.EOF {
381+
return err
382+
}
383+
if n == 0 {
384+
break
385+
}
386+
387+
searchBuf := buf[:n]
388+
searchOffset := 0
389+
for searchOffset < n {
390+
idx := bytes.Index(searchBuf[searchOffset:], []byte{0x50, 0x4b, 0x03, 0x04})
391+
if idx == -1 {
392+
break
393+
}
394+
idx += searchOffset
395+
absoluteIdx := offset + int64(idx)
396+
397+
if idx+30 > n {
398+
break
399+
}
400+
401+
filenameLen := int(binary.LittleEndian.Uint16(searchBuf[idx+26 : idx+28]))
402+
if idx+30+filenameLen > n {
403+
break
404+
}
405+
406+
filename := string(searchBuf[idx+30 : idx+30+filenameLen])
407+
if inStrSlice(f.zip64Entries, filename, true) != -1 {
408+
// Update version
409+
versionBuf := make([]byte, 2)
410+
binary.LittleEndian.PutUint16(versionBuf, 45)
411+
if _, err := file.WriteAt(versionBuf, absoluteIdx+4); err != nil {
412+
return err
413+
}
414+
// Set compressed and uncompressed sizes to 0xFFFFFFFF
415+
sizeBuf := make([]byte, 4)
416+
binary.LittleEndian.PutUint32(sizeBuf, 0xFFFFFFFF)
417+
if _, err := file.WriteAt(sizeBuf, absoluteIdx+18); err != nil {
418+
return err
419+
}
420+
if _, err := file.WriteAt(sizeBuf, absoluteIdx+22); err != nil {
421+
return err
422+
}
423+
}
424+
searchOffset = idx + 1
425+
}
426+
427+
offset += int64(n)
428+
if offset < fileSize {
429+
offset -= 30
430+
}
431+
}
432+
433+
return nil
434+
}

0 commit comments

Comments
 (0)