Skip to content

Commit

Permalink
Always emit() nodes with zero children, don't bump() them (#371)
Browse files Browse the repository at this point in the history
Cleanup weird cases where the parser would `bump()` interior nodes into
the output stream rather than `emit()`ing them. Emitting all interior
nodes explicitly means we can remove some special cases which occurred
during tree building.

As part of this, fix some errors converting broken expressions like
`x var"y"` from `SyntaxNode` to `Expr`.
  • Loading branch information
c42f authored Oct 22, 2023
1 parent 96137ed commit 1b048aa
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 23 deletions.
6 changes: 4 additions & 2 deletions src/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,10 @@ end
# Convert internal node of the JuliaSyntax parse tree to an Expr
function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args)
k = kind(head)
if k == K"var" || k == K"char"
@check length(args) == 1
if (k == K"var" || k == K"char") && length(args) == 1
# Ideally we'd like `@check length(args) == 1` as an invariant for all
# K"var" and K"char" nodes, but this discounts having embedded error
# nodes when ignore_errors=true is set.
return args[1]
elseif k == K"string" || k == K"cmdstring"
return _string_to_Expr(k, args)
Expand Down
21 changes: 15 additions & 6 deletions src/literal_parsing.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
"""
Nontrivia tokens (leaf nodes / literals) which are malformed are parsed into
ErrorVal when `ignore_errors=true` during parsing.
"""
struct ErrorVal
end

Base.show(io::IO, ::ErrorVal) = printstyled(io, "", color=:light_red)

#-------------------------------------------------------------------------------
# This file contains utility functions for converting undecorated source
# strings into Julia values. For example, string->number, string unescaping, etc.
Expand Down Expand Up @@ -364,7 +373,6 @@ end

#-------------------------------------------------------------------------------
function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
# Leaf node
k = kind(head)
# Any errors parsing literals are represented as ErrorVal() - this can
# happen when the user sets `ignore_errors=true` during parsing.
Expand Down Expand Up @@ -404,6 +412,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
return false
end

# TODO: Avoid allocating temporary String here
val_str = String(txtbuf[srcrange])
if k == K"Integer"
parse_int_literal(val_str)
Expand All @@ -417,9 +426,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
else
Symbol(normalize_identifier(val_str))
end
elseif is_keyword(k)
# This should only happen for tokens nested inside errors
Symbol(val_str)
elseif is_operator(k)
isempty(srcrange) ?
Symbol(untokenize(k)) : # synthetic invisible tokens
Expand All @@ -436,9 +442,12 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
Symbol("core_@cmd")
elseif is_syntax_kind(head)
nothing
elseif is_keyword(k)
# This should only happen for tokens nested inside errors
Symbol(val_str)
else
# FIXME: this allows us to recover from trivia is_error nodes
# that we insert below
# Other kinds should only happen for tokens nested inside errors
# TODO: Consolidate this with the is_keyword() above? Something else?
ErrorVal()
end
end
Expand Down
6 changes: 2 additions & 4 deletions src/parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,7 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream;
while true
last_token = j <= lastindex(ranges) ?
ranges[j].last_token : lastindex(tokens)
# Process tokens to nodes for all tokens used by the next internal node
# Process tokens to leaf nodes for all tokens used by the next internal node
while i <= last_token
t = tokens[i]
if kind(t) == K"TOMBSTONE"
Expand All @@ -1031,9 +1031,7 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream;
srcrange = (stream.tokens[i-1].next_byte:
stream.tokens[i].next_byte - 1)
h = head(t)
children = (is_syntax_kind(h) || is_keyword(h)) ?
(stack[n].node for n=1:0) : nothing
node = make_node(h, srcrange, children)
node = make_node(h, srcrange, nothing)
if !isnothing(node)
push!(stack, (first_token=i, node=node))
end
Expand Down
3 changes: 2 additions & 1 deletion src/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1955,7 +1955,8 @@ function parse_resword(ps::ParseState)
elseif word in KSet"break continue"
# break ==> (break)
# continue ==> (continue)
bump(ps)
bump(ps, TRIVIA_FLAG)
emit(ps, mark, word)
k = peek(ps)
if !(k in KSet"NewlineWs ; ) : EndMarker" || (k == K"end" && !ps.end_symbol))
recover(is_closer_or_newline, ps, TRIVIA_FLAG,
Expand Down
8 changes: 1 addition & 7 deletions src/syntax_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,6 @@ text by calling one of the parser API functions such as [`parseall`](@ref)
"""
const SyntaxNode = TreeNode{SyntaxData}

# Value of an error node with no children
struct ErrorVal
end

Base.show(io::IO, ::ErrorVal) = printstyled(io, "", color=:light_red)

function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead};
keep_parens=false, position::Integer=1)
GC.@preserve source begin
Expand All @@ -71,7 +65,7 @@ end
function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int,
raw::GreenNode{SyntaxHead},
position::Int, keep_parens::Bool)
if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw))
if !haschildren(raw)
# Here we parse the values eagerly rather than representing them as
# strings. Maybe this is good. Maybe not.
valrange = position:position + span(raw) - 1
Expand Down
6 changes: 5 additions & 1 deletion test/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,11 @@
@test parsestmt("(x", ignore_errors=true) ==
Expr(:block, :x, Expr(:error))
@test parsestmt("x do", ignore_errors=true) ==
Expr(:block, :x, Expr(:error, Expr(:do_lambda)))
Expr(:block, :x, Expr(:error, :do))
@test parsestmt("x var\"y\"", ignore_errors=true) ==
Expr(:block, :x, Expr(:error, :var, ErrorVal(), "y", ErrorVal()))
@test parsestmt("var\"y", ignore_errors=true) ==
Expr(:var, :y, Expr(:error))
end

@testset "import" begin
Expand Down
4 changes: 2 additions & 2 deletions test/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ tests = [
((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))"
((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))"
"struct A end" => "(struct A (block))"
"struct try end" => "(struct (error (try)) (block))"
"struct try end" => "(struct (error try) (block))"
# return
"return\nx" => "(return)"
"return)" => "(return)"
Expand All @@ -503,7 +503,7 @@ tests = [
# module/baremodule
"module A end" => "(module A (block))"
"baremodule A end" => "(module-bare A (block))"
"module do \n end" => "(module (error (do)) (block))"
"module do \n end" => "(module (error do) (block))"
"module \$A end" => "(module (\$ A) (block))"
"module A \n a \n b \n end" => "(module A (block a b))"
"""module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))"""
Expand Down
1 change: 1 addition & 0 deletions test/test_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ using .JuliaSyntax:
# Nodes
GreenNode,
SyntaxNode,
ErrorVal,
# Node inspection
kind,
flags,
Expand Down

0 comments on commit 1b048aa

Please sign in to comment.