Skip to content

Commit 509a485

Browse files
committed
stop using pop_graphemes to make a faster lexer
1 parent 7884fca commit 509a485

File tree

2 files changed

+98
-49
lines changed

2 files changed

+98
-49
lines changed

eyg/src/eyg/parse/lexer.gleam

+97-48
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import eyg/parse/token as t
2-
import gleam/bit_array
32
import gleam/list
43
import gleam/string
4+
import gleam/stringx
55

66
pub fn lex(raw) {
77
loop(raw, 0, [])
@@ -50,7 +50,7 @@ fn pop(raw, start) {
5050
"shallow" <> rest -> done(t.Shallow, 7, rest)
5151
"handle" <> rest -> done(t.Handle, 6, rest)
5252

53-
"\"" <> rest -> string("", rest, done)
53+
"\"" <> rest -> string("", 1, rest, done)
5454

5555
"1" <> rest -> integer("1", rest, done)
5656
"2" <> rest -> integer("2", rest, done)
@@ -63,63 +63,116 @@ fn pop(raw, start) {
6363
"9" <> rest -> integer("9", rest, done)
6464
"0" <> rest -> integer("0", rest, done)
6565
_ -> {
66-
case string.pop_grapheme(raw) {
67-
Ok(#(g, rest)) ->
68-
case is_lower_grapheme(g) || g == "_" {
69-
True -> name(g, rest, done)
70-
False ->
71-
case is_upper_grapheme(g) {
72-
True -> uppername(g, rest, done)
73-
False -> done(t.UnexpectedGrapheme(g), byte_size(g), rest)
74-
}
75-
}
76-
Error(Nil) -> Error(Nil)
66+
let next_byte = stringx.byte_slice_range(raw, 0, 1)
67+
let rest = stringx.byte_slice_from(raw, 1)
68+
case next_byte {
69+
"_"
70+
| "a"
71+
| "b"
72+
| "c"
73+
| "d"
74+
| "e"
75+
| "f"
76+
| "g"
77+
| "h"
78+
| "i"
79+
| "j"
80+
| "k"
81+
| "l"
82+
| "m"
83+
| "n"
84+
| "o"
85+
| "p"
86+
| "q"
87+
| "r"
88+
| "s"
89+
| "t"
90+
| "u"
91+
| "v"
92+
| "w"
93+
| "x"
94+
| "y"
95+
| "z" -> name(next_byte, rest, done)
96+
"A"
97+
| "B"
98+
| "C"
99+
| "D"
100+
| "E"
101+
| "F"
102+
| "G"
103+
| "H"
104+
| "I"
105+
| "J"
106+
| "K"
107+
| "L"
108+
| "M"
109+
| "N"
110+
| "O"
111+
| "P"
112+
| "Q"
113+
| "R"
114+
| "S"
115+
| "T"
116+
| "U"
117+
| "V"
118+
| "W"
119+
| "X"
120+
| "Y"
121+
| "Z" -> uppername(next_byte, rest, done)
122+
"" -> Error(Nil)
123+
_ -> done(t.UnexpectedGrapheme(raw), string.byte_size(raw), "")
77124
}
78125
}
79126
}
80127
}
81128

82-
fn string(buffer, rest, done) {
129+
fn string(buffer, length, rest, done) {
83130
case rest {
84-
"\"" <> rest -> done(t.String(buffer), byte_size(buffer) + 2, rest)
131+
"\"" <> rest -> done(t.String(buffer), length + 1, rest)
85132
"\\" <> rest ->
86-
case string.pop_grapheme(rest) {
87-
Ok(#(g, rest)) -> string(buffer <> "\\" <> g, rest, done)
88-
Error(Nil) -> string(buffer <> "\\", rest, done)
89-
}
90-
_ ->
91-
case string.pop_grapheme(rest) {
92-
Ok(#(g, rest)) -> string(buffer <> g, rest, done)
93-
Error(Nil) ->
94-
done(t.UnterminatedString(buffer), byte_size(buffer) + 1, "")
133+
case rest {
134+
"\"" <> rest -> string(buffer <> "\"", length + 2, rest, done)
135+
"\\" <> rest -> string(buffer <> "\\", length + 2, rest, done)
136+
"t" <> rest -> string(buffer <> "\t", length + 2, rest, done)
137+
"r" <> rest -> string(buffer <> "\r", length + 2, rest, done)
138+
"n" <> rest -> string(buffer <> "\n", length + 2, rest, done)
139+
"" -> done(t.UnterminatedString(buffer <> "\\"), length, "")
140+
_ -> todo as "invalid escape"
95141
}
142+
"" -> done(t.UnterminatedString(buffer), length, "")
143+
_ -> {
144+
let next_byte = stringx.byte_slice_range(rest, 0, 1)
145+
let rest = stringx.byte_slice_from(rest, 1)
146+
string(buffer <> next_byte, length + 1, rest, done)
147+
}
96148
}
97149
}
98150

99151
fn name(buffer, raw, done) {
100-
case string.pop_grapheme(raw) {
101-
Ok(#(g, rest)) ->
102-
case is_lower_grapheme(g) || is_digit_grapheme(g) || g == "_" {
103-
True -> name(buffer <> g, rest, done)
104-
False -> done(t.Name(buffer), byte_size(buffer), raw)
105-
}
106-
Error(Nil) -> done(t.Name(buffer), byte_size(buffer), raw)
152+
let next_byte = stringx.byte_slice_range(raw, 0, 1)
153+
let rest = stringx.byte_slice_from(raw, 1)
154+
case
155+
is_lower_grapheme(next_byte)
156+
|| is_digit_grapheme(next_byte)
157+
|| next_byte == "_"
158+
{
159+
True -> name(buffer <> next_byte, rest, done)
160+
False -> done(t.Name(buffer), string.byte_size(buffer), raw)
107161
}
108162
}
109163

110164
fn uppername(buffer, raw, done) {
111-
case string.pop_grapheme(raw) {
112-
Ok(#(g, rest)) ->
113-
case
114-
is_upper_grapheme(g)
115-
|| is_lower_grapheme(g)
116-
|| is_digit_grapheme(g)
117-
|| g == "_"
118-
{
119-
True -> uppername(buffer <> g, rest, done)
120-
False -> done(t.Uppername(buffer), byte_size(buffer), raw)
121-
}
122-
Error(Nil) -> done(t.Uppername(buffer), byte_size(buffer), raw)
165+
let next_byte = stringx.byte_slice_range(raw, 0, 1)
166+
let rest = stringx.byte_slice_from(raw, 1)
167+
168+
case
169+
is_upper_grapheme(next_byte)
170+
|| is_lower_grapheme(next_byte)
171+
|| is_digit_grapheme(next_byte)
172+
|| next_byte == "_"
173+
{
174+
True -> uppername(buffer <> next_byte, rest, done)
175+
False -> done(t.Uppername(buffer), string.byte_size(buffer), raw)
123176
}
124177
}
125178

@@ -135,14 +188,10 @@ fn integer(buffer, rest, done) {
135188
"8" <> rest -> integer(buffer <> "8", rest, done)
136189
"9" <> rest -> integer(buffer <> "9", rest, done)
137190
"0" <> rest -> integer(buffer <> "0", rest, done)
138-
_ -> done(t.Integer(buffer), byte_size(buffer), rest)
191+
_ -> done(t.Integer(buffer), string.byte_size(buffer), rest)
139192
}
140193
}
141194

142-
fn byte_size(string: String) -> Int {
143-
bit_array.byte_size(<<string:utf8>>)
144-
}
145-
146195
fn is_lower_grapheme(grapheme) {
147196
case grapheme {
148197
"a"

eyg/test/eyg/parser/lexer_test.gleam

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pub fn string_test() {
4646
|> should.equal([
4747
#(t.String(""), 0),
4848
#(t.String("hello"), 3),
49-
#(t.String("\\\\"), 11),
49+
#(t.String("\\"), 11),
5050
])
5151
}
5252

0 commit comments

Comments
 (0)