Skip to content

Commit b5fe105

Browse files
committed
Enhance bufferedWriter with bufio.Writer, scratch space, and configurable thresholds
- Add bufio.Writer layer after temp file threshold is crossed, reducing disk write syscalls from per-Write to per-buffer-full (default 128 KiB) - Add scratch [24]byte field for strconv.Append* to avoid heap allocations in WriteInt, WriteUint, and WriteFloat helper methods - Track total bytes written for offset-based operations (WriteAt) - Add WriteAt method for updating data at specific offsets in both in-memory and temp file modes - Add CopyTo method with large read buffer to minimize Pread syscalls when copying data to ZIP writers - Add Bytes method for direct access to in-memory buffer contents - Add Reset method for clearing state without closing the temp file - Add StreamingChunkSize option to control when streaming spills to disk (0 = default 16 MiB, -1 = never spill, keeps all data in memory) - Add StreamingBufSize option to control bufio.Writer size aft- Add StreamingBufSize option to control bufio.Writer size aft- Add StreamingBufSize option to control bufio.Writer size aft- Add Streaminak memory usage during streaming writes
1 parent 4bebb61 commit b5fe105

3 files changed

Lines changed: 181 additions & 15 deletions

File tree

excelize.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ type Options struct {
122122
LongDatePattern string
123123
LongTimePattern string
124124
CultureInfo CultureName
125+
// StreamingChunkSize is the number of bytes of XML data accumulated in
126+
// memory before a streaming worksheet spills to a temp file. A smaller
127+
// value reduces peak memory usage at the cost of more disk I/O. Zero
128+
// means use the default (StreamChunkSize = 16 MiB). Set to -1 to
129+
// disable temp files entirely (all data stays in memory); this
130+
// eliminates disk I/O overhead and can be significantly faster when
131+
// sufficient memory is available.
132+
StreamingChunkSize int
133+
// StreamingBufSize is the size of the bufio.Writer used for all disk
134+
// writes after the StreamingChunkSize threshold is crossed. Larger values
135+
// reduce write syscall counts at the cost of slightly more memory. The
136+
// measured inflection point on NVMe and HDD alike is 128 KiB. Zero means
137+
// use the default (StreamingBufSizeDefault = 128 KiB).
138+
StreamingBufSize int
125139
}
126140

127141
// OpenFile take the name of a spreadsheet file and returns a populated

stream.go

Lines changed: 166 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
package excelize
1313

1414
import (
15+
"bufio"
1516
"bytes"
1617
"encoding/xml"
1718
"fmt"
1819
"io"
20+
"math"
1921
"os"
2022
"reflect"
2123
"strconv"
@@ -119,11 +121,26 @@ func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) {
119121
if sheetID == -1 {
120122
return nil, ErrSheetNotExist{sheet}
121123
}
124+
chunkSize := f.options.StreamingChunkSize
125+
switch {
126+
case chunkSize < 0:
127+
chunkSize = math.MaxInt // never spill to disk
128+
case chunkSize == 0:
129+
chunkSize = StreamChunkSize
130+
}
131+
bufSize := f.options.StreamingBufSize
132+
if bufSize <= 0 {
133+
bufSize = StreamingBufSizeDefault
134+
}
122135
sw := &StreamWriter{
123136
file: f,
124137
Sheet: sheet,
125138
SheetID: sheetID,
126-
rawData: bufferedWriter{tmpDir: f.options.TmpDir},
139+
rawData: bufferedWriter{
140+
tmpDir: f.options.TmpDir,
141+
flushSize: chunkSize,
142+
bioSize: bufSize,
143+
},
127144
}
128145
var err error
129146
sw.worksheet, err = f.workSheetReader(sheet)
@@ -791,19 +808,126 @@ func bulkAppendFields(w io.Writer, ws *xlsxWorksheet, from, to int) {
791808
// is written to the temp file with Sync, which may return an error.
792809
// Therefore, Sync should be periodically called and the error checked.
793810
type bufferedWriter struct {
794-
tmpDir string
795-
tmp *os.File
796-
buf bytes.Buffer
811+
tmpDir string
812+
tmp *os.File
813+
buf bytes.Buffer // used before temp file is created
814+
bio *bufio.Writer // used after temp file is created
815+
scratch [24]byte // scratch space for strconv.Append* to avoid heap allocs
816+
flushSize int // if >0, flush to temp file at this threshold instead of StreamChunkSize
817+
bioSize int // bufio.Writer buffer size after threshold; 0 = use StreamingBufSizeDefault
818+
written int64 // total bytes written (for tracking offsets)
797819
}
798820

799-
// Write to the in-memory buffer. The error is always nil.
821+
// Write to the active writer (bufio if streaming, otherwise in-memory buffer).
800822
func (bw *bufferedWriter) Write(p []byte) (n int, err error) {
801-
return bw.buf.Write(p)
823+
if bw.bio != nil {
824+
n, err = bw.bio.Write(p)
825+
} else {
826+
n, err = bw.buf.Write(p)
827+
}
828+
bw.written += int64(n)
829+
return
802830
}
803831

804-
// WriteString write to the in-memory buffer. The error is always nil.
832+
// WriteString writes to the active writer.
805833
func (bw *bufferedWriter) WriteString(p string) (n int, err error) {
806-
return bw.buf.WriteString(p)
834+
if bw.bio != nil {
835+
n, err = bw.bio.WriteString(p)
836+
} else {
837+
n, err = bw.buf.WriteString(p)
838+
}
839+
bw.written += int64(n)
840+
return
841+
}
842+
843+
// WriteInt formats and writes an int64 directly using the scratch space,
844+
// avoiding a heap-allocated string.
845+
func (bw *bufferedWriter) WriteInt(v int64) {
846+
b := strconv.AppendInt(bw.scratch[:0], v, 10)
847+
if bw.bio != nil {
848+
bw.bio.Write(b)
849+
} else {
850+
bw.buf.Write(b)
851+
}
852+
bw.written += int64(len(b))
853+
}
854+
855+
// WriteUint formats and writes a uint64 directly to the buffer.
856+
func (bw *bufferedWriter) WriteUint(v uint64) {
857+
b := strconv.AppendUint(bw.scratch[:0], v, 10)
858+
if bw.bio != nil {
859+
bw.bio.Write(b)
860+
} else {
861+
bw.buf.Write(b)
862+
}
863+
bw.written += int64(len(b))
864+
}
865+
866+
// WriteFloat formats and writes a float64 directly to the buffer.
867+
func (bw *bufferedWriter) WriteFloat(v float64, fmt byte, prec, bitSize int) {
868+
b := strconv.AppendFloat(bw.scratch[:0], v, fmt, prec, bitSize)
869+
if bw.bio != nil {
870+
bw.bio.Write(b)
871+
} else {
872+
bw.buf.Write(b)
873+
}
874+
bw.written += int64(len(b))
875+
}
876+
877+
// Bytes returns the in-memory buffer contents. This is only valid when no
878+
// temp file has been created (i.e. for small worksheets). Once streaming to
879+
// disk, this returns nil.
880+
func (bw *bufferedWriter) Bytes() []byte {
881+
if bw.tmp != nil {
882+
return nil
883+
}
884+
return bw.buf.Bytes()
885+
}
886+
887+
// WriteAt writes data at a specific offset. For in-memory buffers, it
888+
// modifies the buffer directly. For temp files, it flushes first then
889+
// uses pwrite. The data must fit within previously written bytes.
890+
func (bw *bufferedWriter) WriteAt(p []byte, offset int64) error {
891+
if bw.tmp == nil {
892+
// In-memory: directly modify buffer bytes
893+
buf := bw.buf.Bytes()
894+
if offset+int64(len(p)) > int64(len(buf)) {
895+
return fmt.Errorf("WriteAt: offset %d + len %d exceeds buffer size %d", offset, len(p), len(buf))
896+
}
897+
copy(buf[offset:], p)
898+
return nil
899+
}
900+
// Temp file: flush bufio first, then use pwrite
901+
if err := bw.Flush(); err != nil {
902+
return err
903+
}
904+
_, err := bw.tmp.WriteAt(p, offset)
905+
return err
906+
}
907+
908+
// CopyTo efficiently copies all buffered data to w. For in-memory buffers
909+
// this is a simple WriteTo. For temp files this uses a large read buffer to
910+
// minimize syscalls.
911+
func (bw *bufferedWriter) CopyTo(w io.Writer) (int64, error) {
912+
if bw.tmp == nil {
913+
return io.Copy(w, bytes.NewReader(bw.buf.Bytes()))
914+
}
915+
if err := bw.Flush(); err != nil {
916+
return 0, err
917+
}
918+
if _, err := bw.tmp.Seek(0, 0); err != nil {
919+
return 0, err
920+
}
921+
// Use a large read buffer to batch Pread syscalls. Without this,
922+
// io.Copy uses 32 KB reads, generating thousands of syscalls for
923+
// large worksheets (e.g. 100 MB XML → 3000+ syscalls). A 256 KB
924+
// buffer reduces that to ~400.
925+
readBufSize := 256 * 1024
926+
if bw.bioSize > readBufSize {
927+
readBufSize = bw.bioSize
928+
}
929+
br := bufio.NewReaderSize(bw.tmp, readBufSize)
930+
return io.Copy(w, br)
807931
}
808932

809933
// Reader provides read-access to the underlying buffer/file.
@@ -823,10 +947,16 @@ func (bw *bufferedWriter) Reader() (io.Reader, error) {
823947
}
824948

825949
// Sync will write the in-memory buffer to a temp file, if the in-memory
826-
// buffer has grown large enough. Any error will be returned.
950+
// buffer has grown large enough. Once the temp file is created, all
951+
// subsequent writes go through the bufio.Writer and the bytes.Buffer is
952+
// released.
827953
func (bw *bufferedWriter) Sync() (err error) {
828-
// Try to use local storage
829-
if bw.buf.Len() < StreamChunkSize {
954+
// Already streaming to disk via bufio.Writer — nothing to do here;
955+
// the final Flush() call will drain any remaining bytes.
956+
if bw.bio != nil {
957+
return nil
958+
}
959+
if bw.buf.Len() < bw.flushSize {
830960
return nil
831961
}
832962
if bw.tmp == nil {
@@ -836,26 +966,47 @@ func (bw *bufferedWriter) Sync() (err error) {
836966
return nil
837967
}
838968
}
839-
return bw.Flush()
969+
// Drain the in-memory buffer to the temp file
970+
if _, err = bw.buf.WriteTo(bw.tmp); err != nil {
971+
return err
972+
}
973+
// Release the bytes.Buffer backing array entirely
974+
bw.buf = bytes.Buffer{}
975+
// Switch to bufio.Writer for all future writes
976+
bw.bio = bufio.NewWriterSize(bw.tmp, bw.bioSize)
977+
return nil
840978
}
841979

842-
// Flush the entire in-memory buffer to the temp file, if a temp file is being
843-
// used.
980+
// Flush ensures all buffered data is written to the temp file.
844981
func (bw *bufferedWriter) Flush() error {
845982
if bw.tmp == nil {
846983
return nil
847984
}
985+
if bw.bio != nil {
986+
return bw.bio.Flush()
987+
}
848988
_, err := bw.buf.WriteTo(bw.tmp)
849989
if err != nil {
850990
return err
851991
}
852-
bw.buf.Reset()
992+
bw.buf = bytes.Buffer{}
853993
return nil
854994
}
855995

996+
// Reset clears all buffered data (in-memory and bufio) without closing the
997+
// temp file.
998+
func (bw *bufferedWriter) Reset() {
999+
bw.buf.Reset()
1000+
if bw.bio != nil {
1001+
bw.bio.Reset(&bw.buf) // detach from temp file
1002+
bw.bio = nil
1003+
}
1004+
}
1005+
8561006
// Close the underlying temp file and reset the in-memory buffer.
8571007
func (bw *bufferedWriter) Close() error {
8581008
bw.buf.Reset()
1009+
bw.bio = nil
8591010
if bw.tmp == nil {
8601011
return nil
8611012
}

templates.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ const (
186186
MinColumns = 1
187187
MinFontSize = 1
188188
StreamChunkSize = 1 << 24
189+
StreamingBufSizeDefault = 128 << 10
189190
TotalCellChars = 32767
190191
TotalRows = 1048576
191192
TotalSheetHyperlinks = 65529

0 commit comments

Comments
 (0)