Skip to content

Commit 223bf28

Browse files
committed
parser/pageparser: Don't store the byte slices
On its own this change doesn't do any magic, but this is part of a bigger picture about making Hugo leaner in the memory usage department.
1 parent 72b0ccd commit 223bf28

File tree

13 files changed

+385
-198
lines changed

13 files changed

+385
-198
lines changed

.github/workflows/test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ jobs:
5050
- if: matrix.os == 'windows-latest'
5151
run: |
5252
choco install pandoc
53+
choco install mingw --version 10.2.0 --allow-downgrade
5354
- run: pandoc -v
5455
- if: matrix.os == 'ubuntu-latest'
5556
name: Install dart-sass-embedded Linux

hugolib/page.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ func (p *pageState) mapContentForResult(
639639
if fe, ok := err.(herrors.FileError); ok {
640640
return fe
641641
}
642-
return p.parseError(err, iter.Input(), i.Pos)
642+
return p.parseError(err, result.Input(), i.Pos())
643643
}
644644

645645
// the parser is guaranteed to return items in proper order or fail, so …
@@ -656,14 +656,14 @@ Loop:
656656
case it.Type == pageparser.TypeIgnore:
657657
case it.IsFrontMatter():
658658
f := pageparser.FormatFromFrontMatterType(it.Type)
659-
m, err := metadecoders.Default.UnmarshalToMap(it.Val, f)
659+
m, err := metadecoders.Default.UnmarshalToMap(it.Val(result.Input()), f)
660660
if err != nil {
661661
if fe, ok := err.(herrors.FileError); ok {
662662
pos := fe.Position()
663663
// Apply the error to the content file.
664664
pos.Filename = p.File().Filename()
665665
// Offset the starting position of front matter.
666-
offset := iter.LineNumber() - 1
666+
offset := iter.LineNumber(result.Input()) - 1
667667
if f == metadecoders.YAML {
668668
offset -= 1
669669
}
@@ -687,7 +687,7 @@ Loop:
687687

688688
next := iter.Peek()
689689
if !next.IsDone() {
690-
p.source.posMainContent = next.Pos
690+
p.source.posMainContent = next.Pos()
691691
}
692692

693693
if !p.s.shouldBuild(p) {
@@ -699,10 +699,10 @@ Loop:
699699
posBody := -1
700700
f := func(item pageparser.Item) bool {
701701
if posBody == -1 && !item.IsDone() {
702-
posBody = item.Pos
702+
posBody = item.Pos()
703703
}
704704

705-
if item.IsNonWhitespace() {
705+
if item.IsNonWhitespace(result.Input()) {
706706
p.truncated = true
707707

708708
// Done
@@ -712,7 +712,7 @@ Loop:
712712
}
713713
iter.PeekWalk(f)
714714

715-
p.source.posSummaryEnd = it.Pos
715+
p.source.posSummaryEnd = it.Pos()
716716
p.source.posBodyStart = posBody
717717
p.source.hasSummaryDivider = true
718718

@@ -727,13 +727,13 @@ Loop:
727727
// let extractShortcode handle left delim (will do so recursively)
728728
iter.Backup()
729729

730-
currShortcode, err := s.extractShortcode(ordinal, 0, iter)
730+
currShortcode, err := s.extractShortcode(ordinal, 0, result.Input(), iter)
731731
if err != nil {
732732
return fail(err, it)
733733
}
734734

735-
currShortcode.pos = it.Pos
736-
currShortcode.length = iter.Current().Pos - it.Pos
735+
currShortcode.pos = it.Pos()
736+
currShortcode.length = iter.Current().Pos() - it.Pos()
737737
if currShortcode.placeholder == "" {
738738
currShortcode.placeholder = createShortcodePlaceholder("s", currShortcode.ordinal)
739739
}
@@ -754,15 +754,15 @@ Loop:
754754
rn.AddShortcode(currShortcode)
755755

756756
case it.Type == pageparser.TypeEmoji:
757-
if emoji := helpers.Emoji(it.ValStr()); emoji != nil {
757+
if emoji := helpers.Emoji(it.ValStr(result.Input())); emoji != nil {
758758
rn.AddReplacement(emoji, it)
759759
} else {
760760
rn.AddBytes(it)
761761
}
762762
case it.IsEOF():
763763
break Loop
764764
case it.IsError():
765-
err := fail(errors.New(it.ValStr()), it)
765+
err := fail(errors.New(it.ValStr(result.Input())), it)
766766
currShortcode.err = err
767767
return err
768768

hugolib/page__content.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func (p pageContent) contentToRender(parsed pageparser.Result, pm *pageContentMa
4545
for _, it := range pm.items {
4646
switch v := it.(type) {
4747
case pageparser.Item:
48-
c = append(c, source[v.Pos:v.Pos+len(v.Val)]...)
48+
c = append(c, source[v.Pos():v.Pos()+len(v.Val(source))]...)
4949
case pageContentReplacement:
5050
c = append(c, v.val...)
5151
case *shortcode:

hugolib/shortcode.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ func (s *shortcodeHandler) parseError(err error, input []byte, pos int) error {
509509
// pageTokens state:
510510
// - before: positioned just before the shortcode start
511511
// - after: shortcode(s) consumed (plural when they are nested)
512-
func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.Iterator) (*shortcode, error) {
512+
func (s *shortcodeHandler) extractShortcode(ordinal, level int, source []byte, pt *pageparser.Iterator) (*shortcode, error) {
513513
if s == nil {
514514
panic("handler nil")
515515
}
@@ -520,7 +520,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
520520
pt.Backup()
521521
item := pt.Next()
522522
if item.IsIndentation() {
523-
sc.indentation = string(item.Val)
523+
sc.indentation = item.ValStr(source)
524524
}
525525
}
526526

@@ -530,7 +530,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
530530
const errorPrefix = "failed to extract shortcode"
531531

532532
fail := func(err error, i pageparser.Item) error {
533-
return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), pt.Input(), i.Pos)
533+
return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), source, i.Pos())
534534
}
535535

536536
Loop:
@@ -550,7 +550,7 @@ Loop:
550550
if cnt > 0 {
551551
// nested shortcode; append it to inner content
552552
pt.Backup()
553-
nested, err := s.extractShortcode(nestedOrdinal, nextLevel, pt)
553+
nested, err := s.extractShortcode(nestedOrdinal, nextLevel, source, pt)
554554
nestedOrdinal++
555555
if nested != nil && nested.name != "" {
556556
s.addName(nested.name)
@@ -589,7 +589,7 @@ Loop:
589589
// return that error, more specific
590590
continue
591591
}
592-
return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.Val), next)
592+
return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.ValStr(source)), next)
593593
}
594594
}
595595
if next.IsRightShortcodeDelim() {
@@ -602,19 +602,19 @@ Loop:
602602

603603
return sc, nil
604604
case currItem.IsText():
605-
sc.inner = append(sc.inner, currItem.ValStr())
605+
sc.inner = append(sc.inner, currItem.ValStr(source))
606606
case currItem.Type == pageparser.TypeEmoji:
607607
// TODO(bep) avoid the duplication of these "text cases", to prevent
608608
// more of #6504 in the future.
609-
val := currItem.ValStr()
609+
val := currItem.ValStr(source)
610610
if emoji := helpers.Emoji(val); emoji != nil {
611611
sc.inner = append(sc.inner, string(emoji))
612612
} else {
613613
sc.inner = append(sc.inner, val)
614614
}
615615
case currItem.IsShortcodeName():
616616

617-
sc.name = currItem.ValStr()
617+
sc.name = currItem.ValStr(source)
618618

619619
// Used to check if the template expects inner content.
620620
templs := s.s.Tmpl().LookupVariants(sc.name)
@@ -625,7 +625,7 @@ Loop:
625625
sc.info = templs[0].(tpl.Info)
626626
sc.templs = templs
627627
case currItem.IsInlineShortcodeName():
628-
sc.name = currItem.ValStr()
628+
sc.name = currItem.ValStr(source)
629629
sc.isInline = true
630630
case currItem.IsShortcodeParam():
631631
if !pt.IsValueNext() {
@@ -634,11 +634,11 @@ Loop:
634634
// named params
635635
if sc.params == nil {
636636
params := make(map[string]any)
637-
params[currItem.ValStr()] = pt.Next().ValTyped()
637+
params[currItem.ValStr(source)] = pt.Next().ValTyped(source)
638638
sc.params = params
639639
} else {
640640
if params, ok := sc.params.(map[string]any); ok {
641-
params[currItem.ValStr()] = pt.Next().ValTyped()
641+
params[currItem.ValStr(source)] = pt.Next().ValTyped(source)
642642
} else {
643643
return sc, errShortCodeIllegalState
644644
}
@@ -647,11 +647,11 @@ Loop:
647647
// positional params
648648
if sc.params == nil {
649649
var params []any
650-
params = append(params, currItem.ValTyped())
650+
params = append(params, currItem.ValTyped(source))
651651
sc.params = params
652652
} else {
653653
if params, ok := sc.params.([]any); ok {
654-
params = append(params, currItem.ValTyped())
654+
params = append(params, currItem.ValTyped(source))
655655
sc.params = params
656656
} else {
657657
return sc, errShortCodeIllegalState

hugolib/shortcode_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ title: "Shortcodes Galore!"
112112
handler := newShortcodeHandler(nil, s)
113113
iter := p.Iterator()
114114

115-
short, err := handler.extractShortcode(0, 0, iter)
115+
short, err := handler.extractShortcode(0, 0, p.Input(), iter)
116116

117117
test.check(c, short, err)
118118
})
@@ -763,7 +763,7 @@ title: "Hugo Rocks!"
763763
)
764764
}
765765

766-
func TestShortcodeTypedParams(t *testing.T) {
766+
func TestShortcodeParams(t *testing.T) {
767767
t.Parallel()
768768
c := qt.New(t)
769769

@@ -778,6 +778,7 @@ title: "Hugo Rocks!"
778778
types positional: {{< hello true false 33 3.14 >}}
779779
types named: {{< hello b1=true b2=false i1=33 f1=3.14 >}}
780780
types string: {{< hello "true" trues "33" "3.14" >}}
781+
escaped quoute: {{< hello "hello \"world\"." >}}
781782
782783
783784
`).WithTemplatesAdded(
@@ -796,6 +797,7 @@ Get: {{ printf "%v (%T)" $b1 $b1 | safeHTML }}
796797
"types positional: - 0: true (bool) - 1: false (bool) - 2: 33 (int) - 3: 3.14 (float64)",
797798
"types named: - b1: true (bool) - b2: false (bool) - f1: 3.14 (float64) - i1: 33 (int) Get: true (bool) ",
798799
"types string: - 0: true (string) - 1: trues (string) - 2: 33 (string) - 3: 3.14 (string) ",
800+
"hello &#34;world&#34;. (string)",
799801
)
800802
}
801803

parser/pageparser/item.go

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,59 @@ import (
2222
"github.com/yuin/goldmark/util"
2323
)
2424

25+
type lowHigh struct {
26+
Low int
27+
High int
28+
}
29+
2530
type Item struct {
26-
Type ItemType
27-
Pos int
28-
Val []byte
31+
Type ItemType
32+
Err error
33+
34+
// The common case is a single segment.
35+
low int
36+
high int
37+
38+
// This is the uncommon case.
39+
segments []lowHigh
40+
41+
// Used for validation.
42+
firstByte byte
43+
2944
isString bool
3045
}
3146

3247
type Items []Item
3348

34-
func (i Item) ValStr() string {
35-
return string(i.Val)
49+
func (i Item) Pos() int {
50+
if len(i.segments) > 0 {
51+
return i.segments[0].Low
52+
}
53+
return i.low
54+
}
55+
56+
func (i Item) Val(source []byte) []byte {
57+
if len(i.segments) == 0 {
58+
return source[i.low:i.high]
59+
}
60+
61+
if len(i.segments) == 1 {
62+
return source[i.segments[0].Low:i.segments[0].High]
63+
}
64+
65+
var b bytes.Buffer
66+
for _, s := range i.segments {
67+
b.Write(source[s.Low:s.High])
68+
}
69+
return b.Bytes()
70+
}
71+
72+
func (i Item) ValStr(source []byte) string {
73+
return string(i.Val(source))
3674
}
3775

38-
func (i Item) ValTyped() any {
39-
str := i.ValStr()
76+
func (i Item) ValTyped(source []byte) any {
77+
str := i.ValStr(source)
4078
if i.isString {
4179
// A quoted value that is a string even if it looks like a number etc.
4280
return str
@@ -73,8 +111,8 @@ func (i Item) IsIndentation() bool {
73111
return i.Type == tIndentation
74112
}
75113

76-
func (i Item) IsNonWhitespace() bool {
77-
return len(bytes.TrimSpace(i.Val)) > 0
114+
func (i Item) IsNonWhitespace(source []byte) bool {
115+
return len(bytes.TrimSpace(i.Val(source))) > 0
78116
}
79117

80118
func (i Item) IsShortcodeName() bool {
@@ -125,20 +163,21 @@ func (i Item) IsError() bool {
125163
return i.Type == tError
126164
}
127165

128-
func (i Item) String() string {
166+
func (i Item) ToString(source []byte) string {
167+
val := i.Val(source)
129168
switch {
130169
case i.Type == tEOF:
131170
return "EOF"
132171
case i.Type == tError:
133-
return string(i.Val)
172+
return string(val)
134173
case i.Type == tIndentation:
135-
return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(i.Val))
174+
return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(val))
136175
case i.Type > tKeywordMarker:
137-
return fmt.Sprintf("<%s>", i.Val)
138-
case len(i.Val) > 50:
139-
return fmt.Sprintf("%v:%.20q...", i.Type, i.Val)
176+
return fmt.Sprintf("<%s>", val)
177+
case len(val) > 50:
178+
return fmt.Sprintf("%v:%.20q...", i.Type, val)
140179
}
141-
return fmt.Sprintf("%v:[%s]", i.Type, i.Val)
180+
return fmt.Sprintf("%v:[%s]", i.Type, val)
142181
}
143182

144183
type ItemType int

parser/pageparser/item_test.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,22 @@ import (
2222
func TestItemValTyped(t *testing.T) {
2323
c := qt.New(t)
2424

25-
c.Assert(Item{Val: []byte("3.14")}.ValTyped(), qt.Equals, float64(3.14))
26-
c.Assert(Item{Val: []byte(".14")}.ValTyped(), qt.Equals, float64(.14))
27-
c.Assert(Item{Val: []byte("314")}.ValTyped(), qt.Equals, 314)
28-
c.Assert(Item{Val: []byte("314x")}.ValTyped(), qt.Equals, "314x")
29-
c.Assert(Item{Val: []byte("314 ")}.ValTyped(), qt.Equals, "314 ")
30-
c.Assert(Item{Val: []byte("314"), isString: true}.ValTyped(), qt.Equals, "314")
31-
c.Assert(Item{Val: []byte("true")}.ValTyped(), qt.Equals, true)
32-
c.Assert(Item{Val: []byte("false")}.ValTyped(), qt.Equals, false)
33-
c.Assert(Item{Val: []byte("trues")}.ValTyped(), qt.Equals, "trues")
25+
source := []byte("3.14")
26+
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(3.14))
27+
source = []byte(".14")
28+
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(0.14))
29+
source = []byte("314")
30+
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, 314)
31+
source = []byte("314")
32+
c.Assert(Item{low: 0, high: len(source), isString: true}.ValTyped(source), qt.Equals, "314")
33+
source = []byte("314x")
34+
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314x")
35+
source = []byte("314 ")
36+
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314 ")
37+
source = []byte("true")
38+
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, true)
39+
source = []byte("false")
40+
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, false)
41+
source = []byte("trued")
42+
3443
}

0 commit comments

Comments
 (0)