Skip to content
This repository was archived by the owner on Aug 13, 2025. It is now read-only.

Commit 3476977

Browse files
authored
Merge pull request #828 from tealeg/sync-pools-for-ssts
Use sync pools for shared string parser
2 parents faa27f6 + 775f9cf commit 3476977

File tree

5 files changed

+184
-96
lines changed

5 files changed

+184
-96
lines changed

file.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ func autoFilterDefinedName(sheet *Sheet, sheetIndex int) (*xlsxDefinedName, erro
335335
// representing the file in terms of the structure of an XLSX file.
336336
func (f *File) MakeStreamParts() (map[string]string, error) {
337337
var parts map[string]string
338-
var refTable *RefTable = NewSharedStringRefTable(10000) // 10000 is arbitrary
338+
var refTable *RefTable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
339339
refTable.isWrite = true
340340
var workbookRels WorkBookRels = make(WorkBookRels)
341341
var err error
@@ -465,7 +465,7 @@ func (f *File) MakeStreamParts() (map[string]string, error) {
465465
// MarshallParts constructs a map of file name to XML content representing the file
466466
// in terms of the structure of an XLSX file.
467467
func (f *File) MarshallParts(zipWriter *zip.Writer) error {
468-
var refTable *RefTable = NewSharedStringRefTable(10000) // 10000 is arbitrary
468+
var refTable *RefTable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
469469
refTable.isWrite = true
470470
var workbookRels WorkBookRels = make(WorkBookRels)
471471
var err error
@@ -650,9 +650,10 @@ func (f *File) MarshallParts(zipWriter *zip.Writer) error {
650650
// Here, value would be set to the raw value of the cell A1 in the
651651
// first sheet in the XLSX file.
652652
func (f *File) ToSlice() (output [][][]string, err error) {
653-
output = [][][]string{}
653+
sheetCount := len(f.Sheets)
654+
output = make([][][]string, 0, sheetCount)
654655
for _, sheet := range f.Sheets {
655-
s := [][]string{}
656+
s := make([][]string, 0, sheet.MaxRow)
656657
err := sheet.ForEachRow(func(row *Row) error {
657658
r := []string{}
658659
err := row.ForEachCell(func(cell *Cell) error {

lib.go

Lines changed: 114 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"runtime/debug"
1515
"strconv"
1616
"strings"
17+
"sync"
1718
)
1819

1920
const (
@@ -22,6 +23,26 @@ const (
2223
externalSheetBangChar = "!"
2324
)
2425

26+
var (
27+
tokPool = sync.Pool{
28+
New: func() interface{} {
29+
return &xml.StartElement{}
30+
},
31+
}
32+
33+
xlsxSIPool = sync.Pool{
34+
New: func() interface{} {
35+
return &xlsxSI{}
36+
},
37+
}
38+
39+
xmlAttrPool = sync.Pool{
40+
New: func() interface{} {
41+
return &xml.Attr{}
42+
},
43+
}
44+
)
45+
2546
// XLSXReaderError is the standard error type for otherwise undefined
2647
// errors in the XSLX reading process.
2748
type XLSXReaderError struct {
@@ -845,15 +866,104 @@ func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]strin
845866
return sheetsByName, sheets, err
846867
}
847868

869+
func readSharedStrings(rc io.Reader) (*RefTable, error) {
870+
var err error
871+
var decoder *xml.Decoder
872+
var reftable *RefTable
873+
var tok xml.Token
874+
var count int
875+
var countS string
876+
var ok bool
877+
var si *xlsxSI
878+
var attr *xml.Attr
879+
880+
wrap := func(err error) (*RefTable, error) {
881+
return nil, fmt.Errorf("readSharedStrings: %w", err)
882+
}
883+
884+
decoder = xml.NewDecoder(rc)
885+
886+
for {
887+
tok = tokPool.Get().(xml.Token)
888+
tok, err = decoder.Token()
889+
if tok == nil {
890+
break
891+
} else if err == io.EOF {
892+
break
893+
}
894+
if err != nil {
895+
return wrap(err)
896+
}
897+
switch ty := tok.(type) {
898+
case xml.StartElement:
899+
switch ty.Name.Local {
900+
case "sst":
901+
attr = xmlAttrPool.Get().(*xml.Attr)
902+
ok = false
903+
for _, (*attr) = range ty.Attr {
904+
if attr.Name.Local == "count" {
905+
countS = attr.Value
906+
ok = true
907+
break
908+
}
909+
}
910+
xmlAttrPool.Put(attr)
911+
if !ok {
912+
// No hints on the size, so we'll just start with
913+
// a decent number of entries to avoid small
914+
// allocs.
915+
reftable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
916+
reftable.isWrite = false //Todo, do we actually use this?
917+
} else {
918+
count, err = strconv.Atoi(countS)
919+
if err != nil {
920+
return wrap(err)
921+
}
922+
reftable = NewSharedStringRefTable(count)
923+
reftable.isWrite = false //Todo, do we actually use this?
924+
}
925+
case "si":
926+
if reftable == nil {
927+
return wrap(fmt.Errorf("si encountered before reftable created"))
928+
}
929+
si = xlsxSIPool.Get().(*xlsxSI)
930+
if err = decoder.DecodeElement(si, &ty); err != nil {
931+
xlsxSIPool.Put(si)
932+
return wrap(err)
933+
}
934+
if len(si.R) > 0 {
935+
reftable.AddRichText(xmlToRichText(si.R))
936+
} else {
937+
reftable.AddString(si.T.getText())
938+
}
939+
// clean up before returning to the pool, without
940+
// these lines you'll see weird effects when reading
941+
// another set of shared strings
942+
si.R = nil
943+
si.T = nil
944+
xlsxSIPool.Put(si)
945+
default:
946+
// Do nothing
947+
}
948+
default:
949+
// Do nothing
950+
}
951+
tokPool.Put(tok)
952+
}
953+
954+
if reftable == nil {
955+
panic("Unitialised reftable")
956+
}
957+
return reftable, nil
958+
959+
}
960+
848961
// readSharedStringsFromZipFile() is an internal helper function to
849962
// extract a reference table from the sharedStrings.xml file within
850963
// the XLSX zip file.
851964
func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
852-
var sst *xlsxSST
853965
var err error
854966
var rc io.ReadCloser
855-
var decoder *xml.Decoder
856-
var reftable *RefTable
857967

858968
wrap := func(err error) (*RefTable, error) {
859969
return nil, fmt.Errorf("readSharedStringsFromZipFile: %w", err)
@@ -870,15 +980,7 @@ func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
870980
return wrap(err)
871981
}
872982
defer rc.Close()
873-
874-
sst = new(xlsxSST)
875-
decoder = xml.NewDecoder(rc)
876-
err = decoder.Decode(sst)
877-
if err != nil {
878-
return wrap(err)
879-
}
880-
reftable = MakeSharedStringRefTable(sst)
881-
return reftable, nil
983+
return readSharedStrings(rc)
882984
}
883985

884986
// readStylesFromZipFile() is an internal helper function to

lib_test.go

Lines changed: 29 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ func TestLib(t *testing.T) {
284284
// })
285285

286286
csRunC(c, "ReadRowsFromSheet", func(c *qt.C, constructor CellStoreConstructor) {
287+
var err error
287288
var sharedstringsXML = bytes.NewBufferString(`
288289
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
289290
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="4" uniqueCount="4">
@@ -337,14 +338,12 @@ func TestLib(t *testing.T) {
337338
footer="0.3"/>
338339
</worksheet>`)
339340
worksheet := new(xlsxWorksheet)
340-
err := xml.NewDecoder(sheetxml).Decode(worksheet)
341-
c.Assert(err, qt.IsNil)
342-
sst := new(xlsxSST)
343-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
341+
err = xml.NewDecoder(sheetxml).Decode(worksheet)
344342
c.Assert(err, qt.IsNil)
345343
file := new(File)
346344
file.cellStoreConstructor = constructor
347-
file.referenceTable = MakeSharedStringRefTable(sst)
345+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
346+
c.Assert(err, qt.IsNil)
348347
sheet, err := NewSheet("test")
349348
c.Assert(err, qt.IsNil)
350349
lt := make(hyperlinkTable)
@@ -433,12 +432,10 @@ func TestLib(t *testing.T) {
433432
worksheet := new(xlsxWorksheet)
434433
err := xml.NewDecoder(sheetxml).Decode(worksheet)
435434
c.Assert(err, qt.IsNil)
436-
sst := new(xlsxSST)
437-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
438-
c.Assert(err, qt.IsNil)
439435
file := new(File)
440436
file.cellStoreConstructor = constructor
441-
file.referenceTable = MakeSharedStringRefTable(sst)
437+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
438+
c.Assert(err, qt.IsNil)
442439

443440
sheet, err := NewSheetWithCellStore("test", constructor)
444441
c.Assert(err, qt.IsNil)
@@ -486,13 +483,11 @@ func TestLib(t *testing.T) {
486483
worksheet := new(xlsxWorksheet)
487484
err := xml.NewDecoder(sheetxml).Decode(worksheet)
488485
c.Assert(err, qt.IsNil)
489-
sst := new(xlsxSST)
490-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
491-
c.Assert(err, qt.IsNil)
492486

493487
file := new(File)
494488
file.cellStoreConstructor = constructor
495-
file.referenceTable = MakeSharedStringRefTable(sst)
489+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
490+
c.Assert(err, qt.IsNil)
496491
sheet, err := NewSheetWithCellStore("test", constructor)
497492
c.Assert(err, qt.IsNil)
498493
lt := make(hyperlinkTable)
@@ -568,13 +563,11 @@ func TestLib(t *testing.T) {
568563
worksheet := new(xlsxWorksheet)
569564
err := xml.NewDecoder(sheetxml).Decode(worksheet)
570565
c.Assert(err, qt.IsNil)
571-
sst := new(xlsxSST)
572-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
573-
c.Assert(err, qt.IsNil)
574566

575567
file := new(File)
576568
file.cellStoreConstructor = constructor
577-
file.referenceTable = MakeSharedStringRefTable(sst)
569+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
570+
c.Assert(err, qt.IsNil)
578571
sheet, err := NewSheetWithCellStore("test", constructor)
579572
c.Assert(err, qt.IsNil)
580573
lt := make(hyperlinkTable)
@@ -717,12 +710,11 @@ func TestLib(t *testing.T) {
717710
worksheet := new(xlsxWorksheet)
718711
err := xml.NewDecoder(sheetxml).Decode(worksheet)
719712
c.Assert(err, qt.IsNil)
720-
sst := new(xlsxSST)
721-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
722-
c.Assert(err, qt.IsNil)
713+
723714
file := new(File)
724715
file.cellStoreConstructor = constructor
725-
file.referenceTable = MakeSharedStringRefTable(sst)
716+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
717+
c.Assert(err, qt.IsNil)
726718
sheet, err := NewSheetWithCellStore("test", constructor)
727719
c.Assert(err, qt.IsNil)
728720
lt := make(hyperlinkTable)
@@ -764,13 +756,10 @@ func TestLib(t *testing.T) {
764756
err := xml.NewDecoder(sheetxml).Decode(worksheet)
765757
c.Assert(err, qt.IsNil)
766758

767-
sst := new(xlsxSST)
768-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
769-
c.Assert(err, qt.IsNil)
770-
771759
file := new(File)
772760
file.cellStoreConstructor = constructor
773-
file.referenceTable = MakeSharedStringRefTable(sst)
761+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
762+
c.Assert(err, qt.IsNil)
774763
sheet, err := NewSheetWithCellStore("test", constructor)
775764
c.Assert(err, qt.IsNil)
776765
lt := make(hyperlinkTable)
@@ -882,12 +871,11 @@ func TestLib(t *testing.T) {
882871
worksheet := new(xlsxWorksheet)
883872
err := xml.NewDecoder(sheetxml).Decode(worksheet)
884873
c.Assert(err, qt.IsNil)
885-
sst := new(xlsxSST)
886-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
887-
c.Assert(err, qt.IsNil)
888874
file := new(File)
889875
file.cellStoreConstructor = constructor
890-
file.referenceTable = MakeSharedStringRefTable(sst)
876+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
877+
c.Assert(err, qt.IsNil)
878+
891879
sheet, err := NewSheetWithCellStore("test", constructor)
892880
c.Assert(err, qt.IsNil)
893881

@@ -964,12 +952,11 @@ func TestLib(t *testing.T) {
964952
worksheet := new(xlsxWorksheet)
965953
err := xml.NewDecoder(sheetxml).Decode(worksheet)
966954
c.Assert(err, qt.IsNil)
967-
sst := new(xlsxSST)
968-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
969-
c.Assert(err, qt.IsNil)
970955
file := new(File)
971956
file.cellStoreConstructor = constructor
972-
file.referenceTable = MakeSharedStringRefTable(sst)
957+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
958+
c.Assert(err, qt.IsNil)
959+
973960
sheet, err := NewSheetWithCellStore("test", constructor)
974961
c.Assert(err, qt.IsNil)
975962
lt := make(hyperlinkTable)
@@ -1043,12 +1030,11 @@ func TestLib(t *testing.T) {
10431030
worksheet := new(xlsxWorksheet)
10441031
err := xml.NewDecoder(sheetxml).Decode(worksheet)
10451032
c.Assert(err, qt.IsNil)
1046-
sst := new(xlsxSST)
1047-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
1048-
c.Assert(err, qt.IsNil)
10491033
file := new(File)
10501034
file.cellStoreConstructor = constructor
1051-
file.referenceTable = MakeSharedStringRefTable(sst)
1035+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
1036+
c.Assert(err, qt.IsNil)
1037+
10521038
sheet, err := NewSheetWithCellStore("test", constructor)
10531039
c.Assert(err, qt.IsNil)
10541040
lt := make(hyperlinkTable)
@@ -1334,13 +1320,10 @@ func TestLib(t *testing.T) {
13341320
err := xml.NewDecoder(sheetXML).Decode(worksheet)
13351321
c.Assert(err, qt.IsNil)
13361322

1337-
sst := new(xlsxSST)
1338-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
1339-
c.Assert(err, qt.IsNil)
1340-
13411323
file := new(File)
13421324
file.cellStoreConstructor = constructor
1343-
file.referenceTable = MakeSharedStringRefTable(sst)
1325+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
1326+
c.Assert(err, qt.IsNil)
13441327

13451328
sheet, err := NewSheetWithCellStore("test", constructor)
13461329
c.Assert(err, qt.IsNil)
@@ -1433,12 +1416,11 @@ func TestReadRowsFromSheet(t *testing.T) {
14331416
worksheet := new(xlsxWorksheet)
14341417
err := xml.NewDecoder(sheetxml).Decode(worksheet)
14351418
c.Assert(err, qt.IsNil)
1436-
sst := new(xlsxSST)
1437-
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
1438-
c.Assert(err, qt.IsNil)
14391419
file := new(File)
14401420
file.cellStoreConstructor = constructor
1441-
file.referenceTable = MakeSharedStringRefTable(sst)
1421+
file.referenceTable, err = readSharedStrings(sharedstringsXML)
1422+
c.Assert(err, qt.IsNil)
1423+
14421424
worksheet.mapMergeCells()
14431425
sheet, err := NewSheetWithCellStore("test", constructor)
14441426
c.Assert(err, qt.IsNil)

0 commit comments

Comments
 (0)