Skip to content

Commit 3966ecb

Browse files
committed
Fix lexer backtracking at EOI.
1 parent ce72007 commit 3966ecb

15 files changed

Lines changed: 267 additions & 175 deletions

File tree

tm-go/gen/templates.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,17 @@ restart:
265265
ch = int(tmRuneClass[l.ch])
266266
} else if l.ch < 0 {
267267
state = int(tmLexerAction[state*tmNumClasses])
268+
{{- if .Lexer.Tables.Backtrack}}
269+
if state > tmFirstRule && state < 0 {
270+
state = (-1 - state) * 2
271+
backup{{if .Lexer.RuleToken}}Rule{{else}}Token{{end}} = tmBacktracking[state]
272+
backupOffset = l.offset
273+
{{- if .Lexer.ClassActions}}
274+
backupHash = hash
275+
{{- end}}
276+
state = tmBacktracking[state+1]
277+
}
278+
{{- end}}
268279
continue
269280
} else {
270281
{{- if gt .Lexer.Tables.LastMapEntry.Start 2048}}

tm-go/lex/regexp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ func (p *parser) parseEscape(fold bool) charset {
534534
case 's', 'S':
535535
negated := p.ch == 'S'
536536
p.next()
537-
ret := charset(append(p.set[:0], '\t', '\t', '\n', '\n', '\f', '\f', '\r', '\r', ' ', ' '))
537+
ret := charset(append(p.set[:0], '\t', '\t', '\n', '\n', '\v', '\v', '\f', '\f', '\r', '\r', ' ', ' '))
538538
if negated {
539539
ret.invert()
540540
}

tm-go/lex/regexp_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ var parseTests = []struct {
7474
{`\w`, `cc{0-9A-Z_a-z}`},
7575
{`[^\W]`, `cc{0-9A-Z_a-z}`},
7676
{`[\W]`, "cc{\\x00-\\/\\:-\\@\\[-\\^\\`\\{-\\U0010ffff}"},
77-
{`[\s]`, `cc{\t-\n\x0c-\r }`},
78-
{`[^\s]`, `cc{\x00-\x08\x0b\x0e-\x1f\!-\U0010ffff}`},
79-
{`\S`, `cc{\x00-\x08\x0b\x0e-\x1f\!-\U0010ffff}`},
80-
{`[\S]`, `cc{\x00-\x08\x0b\x0e-\x1f\!-\U0010ffff}`},
77+
{`[\s]`, `cc{\t-\r }`},
78+
{`[^\s]`, `cc{\x00-\x08\x0e-\x1f\!-\U0010ffff}`},
79+
{`\S`, `cc{\x00-\x08\x0e-\x1f\!-\U0010ffff}`},
80+
{`[\S]`, `cc{\x00-\x08\x0e-\x1f\!-\U0010ffff}`},
8181
{`+\+`, `cat{str{+}cc{\+}}`},
8282

8383
// Unicode.
@@ -192,7 +192,7 @@ var stringTests = []struct {
192192
// Escapes.
193193
{`\d\D`, `[0-9][\x00-\/\:-\U0010ffff]`},
194194
{`\w+`, `[0-9A-Z_a-z]+`},
195-
{`\S`, `[\x00-\x08\x0b\x0e-\x1f\!-\U0010ffff]`},
195+
{`\S`, `[\x00-\x08\x0e-\x1f\!-\U0010ffff]`},
196196

197197
// Parentheses.
198198
{`a+|(b|cd|)`, `a+|(b|cd|)`},

tm-go/parsers/json/lexer.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ restart:
6464
ch = int(tmRuneClass[l.ch])
6565
} else if l.ch < 0 {
6666
state = int(tmLexerAction[state*tmNumClasses])
67+
if state > tmFirstRule && state < 0 {
68+
state = (-1 - state) * 2
69+
backupToken = tmBacktracking[state]
70+
backupOffset = l.offset
71+
backupHash = hash
72+
state = tmBacktracking[state+1]
73+
}
6774
continue
6875
} else {
6976
ch = 1

tm-go/parsers/test/lexer.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ restart:
6767
ch = int(tmRuneClass[l.ch])
6868
} else if l.ch < 0 {
6969
state = int(tmLexerAction[state*tmNumClasses])
70+
if state > tmFirstRule && state < 0 {
71+
state = (-1 - state) * 2
72+
backupRule = tmBacktracking[state]
73+
backupOffset = l.offset
74+
backupHash = hash
75+
state = tmBacktracking[state+1]
76+
}
7077
continue
7178
} else {
7279
ch = mapRune(l.ch)
@@ -140,12 +147,12 @@ recovered:
140147
break
141148
}
142149
}
143-
case 28:
150+
case 30:
144151
hh := hash & 7
145152
switch hh {
146153
case 4:
147154
if hash == 0x2a762c && "Zfoo" == l.source[l.tokenOffset:l.offset] {
148-
rule = 30
155+
rule = 32
149156
break
150157
}
151158
}
@@ -171,24 +178,24 @@ recovered:
171178
{
172179
l.value = mustParseInt(l.Text())
173180
}
174-
case 32: // MultiLineComment: /\/\*/
181+
case 34: // MultiLineComment: /\/\*/
175182
{
176183
l.State = StateInMultiLine
177184
commentOffset = l.tokenOffset
178185
commentDepth = 0
179186
space = true
180187
}
181-
case 33: // invalid_token: /{eoi}/
188+
case 35: // invalid_token: /{eoi}/
182189
{
183190
l.tokenOffset = commentOffset
184191
l.State = StateInitial
185192
}
186-
case 34: // MultiLineComment: /\/\*/
193+
case 36: // MultiLineComment: /\/\*/
187194
{
188195
commentDepth++
189196
space = true
190197
}
191-
case 35: // MultiLineComment: /\*\//
198+
case 37: // MultiLineComment: /\*\//
192199
{
193200
if commentDepth == 0 {
194201
space = false
@@ -199,7 +206,7 @@ recovered:
199206
space = true
200207
commentDepth--
201208
}
202-
case 36: // WhiteSpace: /[^\/*]+|[*\/]/
209+
case 38: // WhiteSpace: /[^\/*]+|[*\/]/
203210
space = true
204211
{
205212
space = true

tm-go/parsers/test/lexer_tables.go

Lines changed: 127 additions & 101 deletions
Large diffs are not rendered by default.

tm-go/parsers/test/lexer_test.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,16 @@ var lexerTests = []struct {
5151
{test.RPAREN, []string{"«)»"}},
5252
{test.LBRACK, []string{"«[»"}},
5353
{test.RBRACK, []string{"«]»"}},
54-
{test.DOT, []string{"«.»"}},
54+
{test.DOT, []string{
55+
"«.»",
56+
"«.»«.»",
57+
}},
58+
{test.MULTILINE, []string{
59+
"% \n «%q\n% q»\n%f",
60+
"«%q\n%q» !",
61+
"«%q\n% q»",
62+
}},
63+
{test.DOTDOTDOT, []string{"«...»"}},
5564
{test.COMMA, []string{"«,»"}},
5665
{test.COLON, []string{"«:»"}},
5766
{test.PLUS, []string{"«+»"}},

tm-go/parsers/test/parser.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ const (
5454
)
5555

5656
func (p *Parser) ParseTest(ctx context.Context, lexer *Lexer) error {
57-
_, err := p.parse(ctx, 0, 98, lexer)
57+
_, err := p.parse(ctx, 0, 100, lexer)
5858
return err
5959
}
6060

6161
func (p *Parser) ParseDecl1(ctx context.Context, lexer *Lexer) (int, error) {
62-
v, err := p.parse(ctx, 1, 99, lexer)
62+
v, err := p.parse(ctx, 1, 101, lexer)
6363
val, _ := v.(int)
6464
return val, err
6565
}

tm-go/parsers/test/parser_tables.go

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -37,62 +37,63 @@ func symbolName(sym int32) string {
3737
}
3838

3939
var tmAction = []int32{
40-
-1, -1, -3, 11, -1, -1, 57, -1, -1, -1, -27, 1, 3, 4, 58, -1, 16, 51, 52, -1,
41-
-1, -1, 10, -1, -1, 0, 12, -1, -1, 54, -1, -47, -1, -1, 62, -1, -1, 8, -1,
40+
-1, -1, -3, 11, -1, -1, 59, -1, -1, -1, -27, 1, 3, 4, 60, -1, 16, 53, 54, -1,
41+
-1, -1, 10, -1, -1, 0, 12, -1, -1, 56, -1, -47, -1, -1, 64, -1, -1, 8, -1,
4242
-1, 9, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 14, 34, 35,
43-
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 15, 56, -1, -1,
44-
18, 17, -1, -1, -57, 6, -1, 7, 55, -81, -89, 61, -95, -1, 5, -1, 60, -1, 19,
45-
-1, -1, -2, -2,
43+
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 15, 58,
44+
-1, -1, 18, 17, -1, -1, -57, 6, -1, 7, 57, -81, -89, 63, -95, -1, 5, -1, 62,
45+
-1, 19, -1, -1, -2, -2,
4646
}
4747

4848
var tmLalr = []int32{
4949
18, -1, 0, 13, 5, 13, 6, 13, 7, 13, 8, 13, 9, 13, 10, 13, 12, 13, 14, 13, 15,
5050
13, -1, -2, 5, -1, 6, -1, 7, -1, 8, -1, 9, -1, 10, -1, 12, -1, 14, -1, 0, 2,
51-
-1, -2, 20, -1, 17, 64, 25, 64, 11, 65, -1, -2, 13, -1, 0, 59, 5, 59, 6, 59,
52-
7, 59, 8, 59, 9, 59, 10, 59, 12, 59, 14, 59, 15, 59, -1, -2, 17, 64, 25, 64,
53-
11, 65, -1, -2, 25, -1, 17, 53, -1, -2, 25, -1, 17, 63, -1, -2,
51+
-1, -2, 20, -1, 17, 66, 26, 66, 11, 67, -1, -2, 13, -1, 0, 61, 5, 61, 6, 61,
52+
7, 61, 8, 61, 9, 61, 10, 61, 12, 61, 14, 61, 15, 61, -1, -2, 17, 66, 26, 66,
53+
11, 67, -1, -2, 26, -1, 17, 55, -1, -2, 26, -1, 17, 65, -1, -2,
5454
}
5555

5656
var tmGoto = []int32{
5757
0, 4, 6, 8, 10, 16, 46, 64, 82, 102, 124, 142, 146, 168, 172, 192, 206, 216,
58-
230, 234, 238, 242, 244, 246, 252, 254, 264, 266, 268, 270, 272, 274, 276,
59-
278, 286, 288, 304, 306, 308, 310, 312, 314, 332, 352, 372, 380, 390, 400,
58+
230, 234, 238, 242, 244, 246, 248, 254, 256, 266, 268, 270, 272, 274, 276,
59+
278, 280, 282, 290, 292, 308, 310, 312, 314, 316, 318, 336, 356, 376, 384,
60+
394, 404,
6061
}
6162

6263
var tmFromTo = []int8{
63-
96, 98, 97, 99, 27, 41, 27, 42, 27, 43, 19, 29, 27, 44, 75, 85, 0, 2, 4, 16,
64-
9, 2, 10, 2, 20, 31, 23, 2, 24, 2, 27, 45, 38, 2, 39, 2, 76, 86, 79, 86, 80,
65-
86, 83, 2, 92, 86, 0, 3, 9, 3, 10, 3, 23, 3, 24, 3, 27, 46, 38, 3, 39, 3, 83,
66-
3, 0, 4, 9, 4, 10, 4, 23, 4, 24, 4, 27, 47, 38, 4, 39, 4, 83, 4, 0, 5, 1, 5,
67-
9, 5, 10, 5, 23, 5, 24, 5, 27, 48, 38, 5, 39, 5, 83, 5, 0, 6, 9, 6, 10, 6,
68-
23, 6, 24, 6, 27, 49, 36, 6, 38, 6, 39, 6, 83, 6, 90, 6, 0, 7, 9, 7, 10, 7,
69-
23, 7, 24, 7, 27, 50, 38, 7, 39, 7, 83, 7, 27, 51, 35, 80, 0, 8, 9, 8, 10, 8,
70-
23, 8, 24, 8, 27, 52, 36, 8, 38, 8, 39, 8, 83, 8, 90, 8, 27, 53, 81, 90, 0,
71-
9, 4, 17, 9, 9, 10, 9, 23, 9, 24, 9, 27, 54, 38, 9, 39, 9, 83, 9, 9, 22, 23,
72-
37, 24, 40, 27, 55, 38, 82, 39, 84, 83, 91, 4, 18, 5, 19, 7, 20, 8, 21, 27,
73-
56, 21, 36, 27, 57, 28, 73, 30, 74, 32, 77, 33, 78, 94, 95, 2, 15, 27, 58,
74-
15, 26, 27, 59, 30, 75, 31, 76, 27, 60, 27, 61, 9, 23, 23, 38, 27, 62, 27,
75-
63, 27, 64, 33, 79, 87, 92, 89, 79, 94, 79, 27, 65, 27, 66, 27, 67, 27, 68,
76-
27, 69, 27, 70, 27, 71, 0, 10, 9, 24, 23, 39, 38, 83, 0, 96, 0, 11, 9, 11,
77-
10, 25, 23, 11, 24, 25, 38, 11, 39, 25, 83, 25, 27, 72, 17, 27, 18, 28, 20,
78-
32, 19, 30, 0, 12, 1, 97, 9, 12, 10, 12, 23, 12, 24, 12, 38, 12, 39, 12, 83,
79-
12, 0, 13, 9, 13, 10, 13, 23, 13, 24, 13, 36, 81, 38, 13, 39, 13, 83, 13, 90,
80-
93, 0, 14, 9, 14, 10, 14, 23, 14, 24, 14, 36, 14, 38, 14, 39, 14, 83, 14, 90,
81-
14, 20, 33, 76, 87, 80, 89, 92, 94, 20, 34, 76, 34, 79, 88, 80, 34, 92, 34,
82-
20, 35, 76, 35, 79, 35, 80, 35, 92, 35,
64+
98, 100, 99, 101, 27, 41, 27, 42, 27, 43, 19, 29, 27, 44, 77, 87, 0, 2, 4,
65+
16, 9, 2, 10, 2, 20, 31, 23, 2, 24, 2, 27, 45, 38, 2, 39, 2, 78, 88, 81, 88,
66+
82, 88, 85, 2, 94, 88, 0, 3, 9, 3, 10, 3, 23, 3, 24, 3, 27, 46, 38, 3, 39, 3,
67+
85, 3, 0, 4, 9, 4, 10, 4, 23, 4, 24, 4, 27, 47, 38, 4, 39, 4, 85, 4, 0, 5, 1,
68+
5, 9, 5, 10, 5, 23, 5, 24, 5, 27, 48, 38, 5, 39, 5, 85, 5, 0, 6, 9, 6, 10, 6,
69+
23, 6, 24, 6, 27, 49, 36, 6, 38, 6, 39, 6, 85, 6, 92, 6, 0, 7, 9, 7, 10, 7,
70+
23, 7, 24, 7, 27, 50, 38, 7, 39, 7, 85, 7, 27, 51, 35, 82, 0, 8, 9, 8, 10, 8,
71+
23, 8, 24, 8, 27, 52, 36, 8, 38, 8, 39, 8, 85, 8, 92, 8, 27, 53, 83, 92, 0,
72+
9, 4, 17, 9, 9, 10, 9, 23, 9, 24, 9, 27, 54, 38, 9, 39, 9, 85, 9, 9, 22, 23,
73+
37, 24, 40, 27, 55, 38, 84, 39, 86, 85, 93, 4, 18, 5, 19, 7, 20, 8, 21, 27,
74+
56, 21, 36, 27, 57, 28, 75, 30, 76, 32, 79, 33, 80, 96, 97, 2, 15, 27, 58,
75+
15, 26, 27, 59, 30, 77, 31, 78, 27, 60, 27, 61, 27, 62, 9, 23, 23, 38, 27,
76+
63, 27, 64, 27, 65, 33, 81, 89, 94, 91, 81, 96, 81, 27, 66, 27, 67, 27, 68,
77+
27, 69, 27, 70, 27, 71, 27, 72, 27, 73, 0, 10, 9, 24, 23, 39, 38, 85, 0, 98,
78+
0, 11, 9, 11, 10, 25, 23, 11, 24, 25, 38, 11, 39, 25, 85, 25, 27, 74, 17, 27,
79+
18, 28, 20, 32, 19, 30, 0, 12, 1, 99, 9, 12, 10, 12, 23, 12, 24, 12, 38, 12,
80+
39, 12, 85, 12, 0, 13, 9, 13, 10, 13, 23, 13, 24, 13, 36, 83, 38, 13, 39, 13,
81+
85, 13, 92, 95, 0, 14, 9, 14, 10, 14, 23, 14, 24, 14, 36, 14, 38, 14, 39, 14,
82+
85, 14, 92, 14, 20, 33, 78, 89, 82, 91, 94, 96, 20, 34, 78, 34, 81, 90, 82,
83+
34, 94, 34, 20, 35, 78, 35, 81, 35, 82, 35, 94, 35,
8384
}
8485

8586
var tmRuleLen = []int8{
8687
2, 1, 1, 1, 1, 5, 4, 4, 3, 3, 2, 1, 3, 1, 4, 4, 2, 4, 4, 8, 1, 1, 1, 1, 1, 1,
87-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0,
88-
0, 3, 1, 3, 4, 1, 1, 4, 6, 3, 1, 3, 1, 1,
88+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89+
2, 0, 0, 3, 1, 3, 4, 1, 1, 4, 6, 3, 1, 3, 1, 1,
8990
}
9091

9192
var tmRuleSymbol = []int32{
92-
33, 33, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
93-
35, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
94-
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 38, 39, 40, 40, 41,
95-
42, 42, 43, 43, 44, 44, 45, 45, 46,
93+
35, 35, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
94+
37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
95+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 40, 41, 42,
96+
42, 43, 44, 44, 45, 45, 46, 46, 47, 47, 48,
9697
}
9798

9899
var tmRuleType = [...]uint32{
@@ -134,11 +135,13 @@ var tmRuleType = [...]uint32{
134135
0, // setof_not_EOI_or_DOT_or_RBRACE : ')'
135136
0, // setof_not_EOI_or_DOT_or_RBRACE : '['
136137
0, // setof_not_EOI_or_DOT_or_RBRACE : ']'
138+
0, // setof_not_EOI_or_DOT_or_RBRACE : '...'
137139
0, // setof_not_EOI_or_DOT_or_RBRACE : ','
138140
0, // setof_not_EOI_or_DOT_or_RBRACE : ':'
139141
0, // setof_not_EOI_or_DOT_or_RBRACE : '-'
140142
0, // setof_not_EOI_or_DOT_or_RBRACE : '->'
141143
0, // setof_not_EOI_or_DOT_or_RBRACE : '+'
144+
0, // setof_not_EOI_or_DOT_or_RBRACE : multiline
142145
0, // setof_not_EOI_or_DOT_or_RBRACE : dquote
143146
0, // setof_not_EOI_or_DOT_or_RBRACE : squote
144147
0, // setof_not_EOI_or_DOT_or_RBRACE : SharpAtID

tm-go/parsers/test/parser_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ var parseTests = []struct {
9090
decl1(a)`,
9191
}},
9292
{test.InvalidToken, 0, []string{
93-
` decl2 «%» `,
93+
` decl2 «%»`,
9494
}},
9595
{test.Identifier, 0, []string{
9696
` decl1(«abc».«def1») `,

0 commit comments

Comments
 (0)