Skip to content

Commit

Permalink
ascii fast path
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed Oct 27, 2023
1 parent d22e2c7 commit 584d363
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1284,14 +1284,22 @@ function lex_backtick(l::Lexer)
end

const MAX_KW_LENGTH = 10
const ascii_is_identifier_char::Vector{Bool} = map(is_identifier_char ∘ Char, 0x00:0x7f)
function lex_identifier(l::Lexer, c)
h = simple_hash(c, UInt64(0))
n = 1
graphemestate = Ref(zero(Int32))
ascii = isascii(c)
graphemestate = Ref(Int32(ascii)) # all ASCII id chars are UTF8PROC_BOUNDCLASS_OTHER
graphemestate_peek = Ref(zero(Int32))
while true
pc, ppc = dpeekchar(l)
if Unicode.isgraphemebreak!(graphemestate, c, pc)
ascii = ascii && isascii(pc)
if ascii # fast path
pc_byte = pc % UInt8
@inbounds if (pc_byte == UInt8('!') && ppc == '=') || !ascii_is_identifier_char[pc_byte+1]
break
end
elseif Unicode.isgraphemebreak!(graphemestate, c, pc)
if (pc == '!' && ppc == '=') || !is_identifier_char(pc)
break
end
Expand Down

0 comments on commit 584d363

Please sign in to comment.