From 1b048aad2a3a3e2e9d7f5053ff51024f08ca308c Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 22 Oct 2023 16:28:17 +1000 Subject: [PATCH] Always `emit()` nodes with zero children, don't `bump()` them (#371) Cleanup weird cases where the parser would `bump()` interior nodes into the output stream rather than `emit()`ing them. Emitting all interior nodes explicitly means we can remove some special cases which occurred during tree building. As part of this, fix some errors converting broken expressions like `x var"y"` from `SyntaxNode` to `Expr`. --- src/expr.jl | 6 ++++-- src/literal_parsing.jl | 21 +++++++++++++++------ src/parse_stream.jl | 6 ++---- src/parser.jl | 3 ++- src/syntax_tree.jl | 8 +------- test/expr.jl | 6 +++++- test/parser.jl | 4 ++-- test/test_utils.jl | 1 + 8 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/expr.jl b/src/expr.jl index 64692eb7..1dbfc4cd 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -196,8 +196,10 @@ end # Convert internal node of the JuliaSyntax parse tree to an Expr function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) k = kind(head) - if k == K"var" || k == K"char" - @check length(args) == 1 + if (k == K"var" || k == K"char") && length(args) == 1 + # Ideally we'd like `@check length(args) == 1` as an invariant for all + # K"var" and K"char" nodes, but this discounts having embedded error + # nodes when ignore_errors=true is set. return args[1] elseif k == K"string" || k == K"cmdstring" return _string_to_Expr(k, args) diff --git a/src/literal_parsing.jl b/src/literal_parsing.jl index 5cd610ce..a027985a 100644 --- a/src/literal_parsing.jl +++ b/src/literal_parsing.jl @@ -1,3 +1,12 @@ +""" +Nontrivia tokens (leaf nodes / literals) which are malformed are parsed into +ErrorVal when `ignore_errors=true` during parsing. +""" +struct ErrorVal +end + +Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) + #------------------------------------------------------------------------------- # This file contains utility functions for converting undecorated source # strings into Julia values. For example, string->number, string unescaping, etc. @@ -364,7 +373,6 @@ end #------------------------------------------------------------------------------- function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) - # Leaf node k = kind(head) # Any errors parsing literals are represented as ErrorVal() - this can # happen when the user sets `ignore_errors=true` during parsing. @@ -404,6 +412,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) return false end + # TODO: Avoid allocating temporary String here val_str = String(txtbuf[srcrange]) if k == K"Integer" parse_int_literal(val_str) @@ -417,9 +426,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) else Symbol(normalize_identifier(val_str)) end - elseif is_keyword(k) - # This should only happen for tokens nested inside errors - Symbol(val_str) elseif is_operator(k) isempty(srcrange) ? Symbol(untokenize(k)) : # synthetic invisible tokens @@ -436,9 +442,12 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) Symbol("core_@cmd") elseif is_syntax_kind(head) nothing + elseif is_keyword(k) + # This should only happen for tokens nested inside errors + Symbol(val_str) else - # FIXME: this allows us to recover from trivia is_error nodes - # that we insert below + # Other kinds should only happen for tokens nested inside errors + # TODO: Consolidate this with the is_keyword() above? Something else? ErrorVal() end end diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 78571d56..dcbb52af 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -1021,7 +1021,7 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; while true last_token = j <= lastindex(ranges) ? ranges[j].last_token : lastindex(tokens) - # Process tokens to nodes for all tokens used by the next internal node + # Process tokens to leaf nodes for all tokens used by the next internal node while i <= last_token t = tokens[i] if kind(t) == K"TOMBSTONE" @@ -1031,9 +1031,7 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; srcrange = (stream.tokens[i-1].next_byte: stream.tokens[i].next_byte - 1) h = head(t) - children = (is_syntax_kind(h) || is_keyword(h)) ? - (stack[n].node for n=1:0) : nothing - node = make_node(h, srcrange, children) + node = make_node(h, srcrange, nothing) if !isnothing(node) push!(stack, (first_token=i, node=node)) end diff --git a/src/parser.jl b/src/parser.jl index 2f23ed1c..c246a332 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1955,7 +1955,8 @@ function parse_resword(ps::ParseState) elseif word in KSet"break continue" # break ==> (break) # continue ==> (continue) - bump(ps) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, word) k = peek(ps) if !(k in KSet"NewlineWs ; ) : EndMarker" || (k == K"end" && !ps.end_symbol)) recover(is_closer_or_newline, ps, TRIVIA_FLAG, diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl index a2df524d..02ef17f4 100644 --- a/src/syntax_tree.jl +++ b/src/syntax_tree.jl @@ -53,12 +53,6 @@ text by calling one of the parser API functions such as [`parseall`](@ref) """ const SyntaxNode = TreeNode{SyntaxData} -# Value of an error node with no children -struct ErrorVal -end - -Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) - function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; keep_parens=false, position::Integer=1) GC.@preserve source begin @@ -71,7 +65,7 @@ end function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, raw::GreenNode{SyntaxHead}, position::Int, keep_parens::Bool) - if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw)) + if !haschildren(raw) # Here we parse the values eagerly rather than representing them as # strings. Maybe this is good. Maybe not. valrange = position:position + span(raw) - 1 diff --git a/test/expr.jl b/test/expr.jl index a349a3a1..1a943044 100644 --- a/test/expr.jl +++ b/test/expr.jl @@ -743,7 +743,11 @@ @test parsestmt("(x", ignore_errors=true) == Expr(:block, :x, Expr(:error)) @test parsestmt("x do", ignore_errors=true) == - Expr(:block, :x, Expr(:error, Expr(:do_lambda))) + Expr(:block, :x, Expr(:error, :do)) + @test parsestmt("x var\"y\"", ignore_errors=true) == + Expr(:block, :x, Expr(:error, :var, ErrorVal(), "y", ErrorVal())) + @test parsestmt("var\"y", ignore_errors=true) == + Expr(:var, :y, Expr(:error)) end @testset "import" begin diff --git a/test/parser.jl b/test/parser.jl index 6db8c08b..8f85bbeb 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -491,7 +491,7 @@ tests = [ ((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))" ((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))" "struct A end" => "(struct A (block))" - "struct try end" => "(struct (error (try)) (block))" + "struct try end" => "(struct (error try) (block))" # return "return\nx" => "(return)" "return)" => "(return)" @@ -503,7 +503,7 @@ tests = [ # module/baremodule "module A end" => "(module A (block))" "baremodule A end" => "(module-bare A (block))" - "module do \n end" => "(module (error (do)) (block))" + "module do \n end" => "(module (error do) (block))" "module \$A end" => "(module (\$ A) (block))" "module A \n a \n b \n end" => "(module A (block a b))" """module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))""" diff --git a/test/test_utils.jl b/test/test_utils.jl index c8d87ac5..e85621f1 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -19,6 +19,7 @@ using .JuliaSyntax: # Nodes GreenNode, SyntaxNode, + ErrorVal, # Node inspection kind, flags,