Skip to content

Commit bf14f7f

Browse files
committed
Refactor lexer
1 parent bf58593 commit bf14f7f

File tree

6 files changed

+95
-84
lines changed

6 files changed

+95
-84
lines changed

file/error.go

+17-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88

99
type Error struct {
1010
Location
11+
Line int `json:"line"`
12+
Column int `json:"column"`
1113
Message string `json:"message"`
1214
Snippet string `json:"snippet"`
1315
Prev error `json:"prev"`
@@ -18,12 +20,24 @@ func (e *Error) Error() string {
1820
}
1921

2022
func (e *Error) Bind(source Source) *Error {
21-
if snippet, found := source.Snippet(e.Location.Line); found {
23+
e.Line = 1
24+
for i, r := range source {
25+
if i == e.From {
26+
break
27+
}
28+
if r == '\n' {
29+
e.Line++
30+
e.Column = 0
31+
} else {
32+
e.Column++
33+
}
34+
}
35+
if snippet, found := source.Snippet(e.Line); found {
2236
snippet := strings.Replace(snippet, "\t", " ", -1)
2337
srcLine := "\n | " + snippet
2438
var bytes = []byte(snippet)
2539
var indLine = "\n | "
26-
for i := 0; i < e.Location.Column && len(bytes) > 0; i++ {
40+
for i := 0; i < e.Column && len(bytes) > 0; i++ {
2741
_, sz := utf8.DecodeRune(bytes)
2842
bytes = bytes[sz:]
2943
if sz > 1 {
@@ -54,7 +68,7 @@ func (e *Error) Wrap(err error) {
5468
}
5569

5670
func (e *Error) format() string {
57-
if e.Location.Empty() {
71+
if e.Snippet == "" {
5872
return e.Message
5973
}
6074
return fmt.Sprintf(

file/location.go

+2-8
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
package file
22

33
type Location struct {
4-
Line int `json:"line"` // The 1-based line of the location.
5-
Column int `json:"column"` // The 0-based column number of the location.
6-
From int `json:"from"` // The 0-based byte offset from the beginning.
7-
To int `json:"to"` // The 0-based byte offset to the end.
8-
}
9-
10-
func (l Location) Empty() bool {
11-
return l.Column == 0 && l.Line == 0
4+
From int `json:"from"`
5+
To int `json:"to"`
126
}

parser/lexer/lexer.go

+44-47
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,18 @@ package lexer
33
import (
44
"fmt"
55
"strings"
6-
"unicode/utf8"
76

87
"github.com/expr-lang/expr/file"
98
)
109

1110
func Lex(source file.Source) ([]Token, error) {
1211
l := &lexer{
13-
input: source.String(),
12+
source: source,
1413
tokens: make([]Token, 0),
14+
start: 0,
15+
end: 0,
1516
}
16-
17-
l.loc = file.Location{Line: 1, Column: 0}
18-
l.prev = l.loc
19-
l.startLoc = l.loc
17+
l.commit()
2018

2119
for state := root; state != nil; {
2220
state = state(l)
@@ -30,34 +28,27 @@ func Lex(source file.Source) ([]Token, error) {
3028
}
3129

3230
type lexer struct {
33-
input string
31+
source file.Source
3432
tokens []Token
35-
start, end int // current position in input
36-
width int // last rune width
37-
startLoc file.Location // start location
38-
prev, loc file.Location // prev location of end location, end location
33+
backed bool
34+
start, end int
3935
err *file.Error
4036
}
4137

4238
const eof rune = -1
4339

40+
func (l *lexer) commit() {
41+
l.start = l.end
42+
}
43+
4444
func (l *lexer) next() rune {
45-
if l.end >= len(l.input) {
46-
l.width = 0
45+
l.backed = false
46+
if l.end >= len(l.source) {
47+
l.end++
4748
return eof
4849
}
49-
r, w := utf8.DecodeRuneInString(l.input[l.end:])
50-
l.width = w
51-
l.end += w
52-
53-
l.prev = l.loc
54-
if r == '\n' {
55-
l.loc.Line++
56-
l.loc.Column = 0
57-
} else {
58-
l.loc.Column++
59-
}
60-
50+
r := l.source[l.end]
51+
l.end++
6152
return r
6253
}
6354

@@ -68,8 +59,11 @@ func (l *lexer) peek() rune {
6859
}
6960

7061
func (l *lexer) backup() {
71-
l.end -= l.width
72-
l.loc = l.prev
62+
if l.backed {
63+
panic(fmt.Sprintf("lexer: cannot backup twice: %q", l.source[l.start:l.end]))
64+
}
65+
l.backed = true
66+
l.end--
7367
}
7468

7569
func (l *lexer) emit(t Kind) {
@@ -78,35 +72,35 @@ func (l *lexer) emit(t Kind) {
7872

7973
func (l *lexer) emitValue(t Kind, value string) {
8074
l.tokens = append(l.tokens, Token{
81-
Location: l.startLoc,
75+
Location: file.Location{From: l.start, To: l.end},
8276
Kind: t,
8377
Value: value,
8478
})
85-
l.start = l.end
86-
l.startLoc = l.loc
79+
l.commit()
8780
}
8881

8982
func (l *lexer) emitEOF() {
83+
from := l.end - 2
84+
if from < 0 {
85+
from = 0
86+
}
87+
to := l.end - 1
88+
if to < 0 {
89+
to = 0
90+
}
9091
l.tokens = append(l.tokens, Token{
91-
Location: l.prev, // Point to previous position for better error messages.
92+
Location: file.Location{From: from, To: to},
9293
Kind: EOF,
9394
})
94-
l.start = l.end
95-
l.startLoc = l.loc
95+
l.commit()
9696
}
9797

9898
func (l *lexer) skip() {
99-
l.start = l.end
100-
l.startLoc = l.loc
99+
l.commit()
101100
}
102101

103102
func (l *lexer) word() string {
104-
return l.input[l.start:l.end]
105-
}
106-
107-
func (l *lexer) ignore() {
108-
l.start = l.end
109-
l.startLoc = l.loc
103+
return string(l.source[l.start:l.end])
110104
}
111105

112106
func (l *lexer) accept(valid string) bool {
@@ -132,18 +126,18 @@ func (l *lexer) skipSpaces() {
132126
}
133127

134128
func (l *lexer) acceptWord(word string) bool {
135-
pos, loc, prev := l.end, l.loc, l.prev
129+
pos := l.end
136130

137131
l.skipSpaces()
138132

139133
for _, ch := range word {
140134
if l.next() != ch {
141-
l.end, l.loc, l.prev = pos, loc, prev
135+
l.end = pos
142136
return false
143137
}
144138
}
145139
if r := l.peek(); r != ' ' && r != eof {
146-
l.end, l.loc, l.prev = pos, loc, prev
140+
l.end = pos
147141
return false
148142
}
149143

@@ -153,8 +147,11 @@ func (l *lexer) acceptWord(word string) bool {
153147
func (l *lexer) error(format string, args ...any) stateFn {
154148
if l.err == nil { // show first error
155149
l.err = &file.Error{
156-
Location: l.loc,
157-
Message: fmt.Sprintf(format, args...),
150+
Location: file.Location{
151+
From: l.end - 1,
152+
To: l.end,
153+
},
154+
Message: fmt.Sprintf(format, args...),
158155
}
159156
}
160157
return nil
@@ -230,6 +227,6 @@ func (l *lexer) scanRawString(quote rune) (n int) {
230227
ch = l.next()
231228
n++
232229
}
233-
l.emitValue(String, l.input[l.start+1:l.end-1])
230+
l.emitValue(String, string(l.source[l.start+1:l.end-1]))
234231
return
235232
}

parser/lexer/lexer_test.go

+19-13
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ import (
55
"strings"
66
"testing"
77

8+
"github.com/expr-lang/expr/file"
89
"github.com/expr-lang/expr/internal/testify/assert"
910
"github.com/expr-lang/expr/internal/testify/require"
10-
11-
"github.com/expr-lang/expr/file"
1211
. "github.com/expr-lang/expr/parser/lexer"
1312
)
1413

@@ -17,6 +16,13 @@ func TestLex(t *testing.T) {
1716
input string
1817
tokens []Token
1918
}{
19+
{
20+
"1",
21+
[]Token{
22+
{Kind: Number, Value: "1"},
23+
{Kind: EOF},
24+
},
25+
},
2026
{
2127
".5 0.025 1 02 1e3 0xFF 0b0101 0o600 1.2e-4 1_000_000 _42 -.5",
2228
[]Token{
@@ -265,33 +271,33 @@ func compareTokens(i1, i2 []Token) bool {
265271
}
266272

267273
func TestLex_location(t *testing.T) {
268-
source := file.NewSource("1..2 3..4")
274+
source := file.NewSource("1..2\n3..4")
269275
tokens, err := Lex(source)
270276
require.NoError(t, err)
271277
require.Equal(t, []Token{
272-
{Location: file.Location{Line: 1, Column: 0}, Kind: Number, Value: "1"},
273-
{Location: file.Location{Line: 1, Column: 1}, Kind: Operator, Value: ".."},
274-
{Location: file.Location{Line: 1, Column: 3}, Kind: Number, Value: "2"},
275-
{Location: file.Location{Line: 1, Column: 5}, Kind: Number, Value: "3"},
276-
{Location: file.Location{Line: 1, Column: 6}, Kind: Operator, Value: ".."},
277-
{Location: file.Location{Line: 1, Column: 8}, Kind: Number, Value: "4"},
278-
{Location: file.Location{Line: 1, Column: 8}, Kind: EOF, Value: ""},
278+
{Location: file.Location{From: 0, To: 1}, Kind: "Number", Value: "1"},
279+
{Location: file.Location{From: 1, To: 3}, Kind: "Operator", Value: ".."},
280+
{Location: file.Location{From: 3, To: 4}, Kind: "Number", Value: "2"},
281+
{Location: file.Location{From: 5, To: 6}, Kind: "Number", Value: "3"},
282+
{Location: file.Location{From: 6, To: 8}, Kind: "Operator", Value: ".."},
283+
{Location: file.Location{From: 8, To: 9}, Kind: "Number", Value: "4"},
284+
{Location: file.Location{From: 8, To: 9}, Kind: "EOF", Value: ""},
279285
}, tokens)
280286
}
281287

282288
const errorTests = `
283289
"\xQA"
284-
invalid char escape (1:5)
290+
invalid char escape (1:4)
285291
| "\xQA"
286-
| ....^
292+
| ...^
287293
288294
id "hello
289295
literal not terminated (1:10)
290296
| id "hello
291297
| .........^
292298
293299
früh ♥︎
294-
unrecognized character: U+2665 '♥' (1:7)
300+
unrecognized character: U+2665 '♥' (1:6)
295301
| früh ♥︎
296302
`
297303

parser/lexer/state.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ func root(l *lexer) stateFn {
1414
l.emitEOF()
1515
return nil
1616
case utils.IsSpace(r):
17-
l.ignore()
17+
l.skip()
1818
return root
1919
case r == '\'' || r == '"':
2020
l.scanString(r)
@@ -83,14 +83,14 @@ func (l *lexer) scanNumber() bool {
8383
}
8484
}
8585
l.acceptRun(digits)
86-
loc, prev, end := l.loc, l.prev, l.end
86+
end := l.end
8787
if l.accept(".") {
8888
// Lookup for .. operator: if after dot there is another dot (1..2), it maybe a range operator.
8989
if l.peek() == '.' {
9090
// We can't backup() here, as it would require two backups,
9191
// and backup() func supports only one for now. So, save and
9292
// restore it here.
93-
l.loc, l.prev, l.end = loc, prev, end
93+
l.end = end
9494
return true
9595
}
9696
l.acceptRun(digits)
@@ -147,7 +147,7 @@ func not(l *lexer) stateFn {
147147

148148
l.skipSpaces()
149149

150-
pos, loc, prev := l.end, l.loc, l.prev
150+
end := l.end
151151

152152
// Get the next word.
153153
for {
@@ -164,7 +164,7 @@ func not(l *lexer) stateFn {
164164
case "in", "matches", "contains", "startsWith", "endsWith":
165165
l.emit(Operator)
166166
default:
167-
l.end, l.loc, l.prev = pos, loc, prev
167+
l.end = end
168168
}
169169
return root
170170
}
@@ -193,7 +193,7 @@ func singleLineComment(l *lexer) stateFn {
193193
break
194194
}
195195
}
196-
l.ignore()
196+
l.skip()
197197
return root
198198
}
199199

@@ -207,7 +207,7 @@ func multiLineComment(l *lexer) stateFn {
207207
break
208208
}
209209
}
210-
l.ignore()
210+
l.skip()
211211
return root
212212
}
213213

0 commit comments

Comments
 (0)