Skip to content

Commit 16173e8

Browse files
authored
Merge pull request #3237 from buildkite/ansi-parser-speedup
ANSI parser speedup
2 parents a5bf173 + fe5f107 commit 16173e8

File tree

4 files changed

+5030
-14
lines changed

4 files changed

+5030
-14
lines changed

process/ansi.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ package process
88
// including mention of some of the necessary deviations from the standards
99
// (such as allowing some sequences to terminate with BEL instead of ESC '\').
1010
type ansiParser struct {
11-
state ansiParserState
11+
state *ansiParserState
1212
}
1313

14-
// feed passes more bytes through the parser.
15-
func (m *ansiParser) feed(data ...byte) {
14+
// Write passes more bytes through the parser.
15+
func (m *ansiParser) Write(data []byte) (int, error) {
1616
for _, b := range data {
1717
if m.state != nil {
1818
m.state = m.state[b]
@@ -22,6 +22,7 @@ func (m *ansiParser) feed(data ...byte) {
2222
m.state = initialANSIState
2323
}
2424
}
25+
return len(data), nil
2526
}
2627

2728
// insideCode reports if the data is in the middle of an ANSI sequence.
@@ -30,11 +31,11 @@ func (m *ansiParser) insideCode() bool { return m.state != nil }
3031

3132
// ansiParserState is a possible state of the parser. It's a map of incoming-
3233
// byte to next-state. Most next-states are nil (they exit the escape code).
33-
type ansiParserState map[byte]ansiParserState
34+
type ansiParserState [256]*ansiParserState
3435

3536
var (
3637
// initialANSIState is the state the parser enters once it reads ESC.
37-
initialANSIState = ansiParserState{
38+
initialANSIState = &ansiParserState{
3839
// Note that most bytes immediately following ESC terminate the sequence.
3940
// The following require more processing:
4041
'[': csiParameterState, // CSI
@@ -45,10 +46,10 @@ var (
4546
'_': stTextState, // APC
4647
}
4748
// csiParameter state is the state the parser is in after ESC '['
48-
csiParameterState = ansiParserState{}
49+
csiParameterState = &ansiParserState{}
4950

5051
// stTextState is one of the ST-terminated text states (OSC, DCS, APC, etc)
51-
stTextState = ansiParserState{}
52+
stTextState = &ansiParserState{}
5253
)
5354

5455
// The "looping states" can't be built as struct literals since they refer to
@@ -71,7 +72,7 @@ func init() {
7172
// |
7273
// +--anything else--> (nil)
7374
//
74-
csiIntermediate := ansiParserState{}
75+
csiIntermediate := &ansiParserState{}
7576
for b := byte(0x30); b <= 0x3F; b++ {
7677
csiParameterState[b] = csiParameterState
7778
}
@@ -94,7 +95,7 @@ func init() {
9495
// |
9596
// +--BEL--> (nil)
9697
//
97-
stEscapeState := ansiParserState{}
98+
stEscapeState := &ansiParserState{}
9899
for b := range 256 {
99100
stTextState[byte(b)] = stTextState
100101
stEscapeState[byte(b)] = stTextState

process/ansi_test.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package process
22

3-
import "testing"
3+
import (
4+
"os"
5+
"testing"
6+
)
47

58
func TestANSIParser(t *testing.T) {
69
t.Parallel()
@@ -48,9 +51,21 @@ func TestANSIParser(t *testing.T) {
4851

4952
for _, test := range tests {
5053
var p ansiParser
51-
p.feed([]byte(test.input)...)
54+
p.Write([]byte(test.input))
5255
if got := p.insideCode(); got != test.want {
5356
t.Errorf("after p.feed(%q...): p.insideCode() = %t, want %t", test.input, got, test.want)
5457
}
5558
}
5659
}
60+
61+
func BenchmarkANSIParser(b *testing.B) {
62+
npm, err := os.ReadFile("fixtures/npm.sh.raw")
63+
if err != nil {
64+
b.Fatalf("os.ReadFile(fixtures/npm.sh.raw) error = %v", err)
65+
}
66+
b.ResetTimer()
67+
for range b.N {
68+
var p ansiParser
69+
p.Write(npm)
70+
}
71+
}

0 commit comments

Comments
 (0)