Skip to content

Commit 1b048aa

Browse files
authored
Always emit() nodes with zero children, don't bump() them (#371)
Cleanup weird cases where the parser would `bump()` interior nodes into the output stream rather than `emit()`ing them. Emitting all interior nodes explicitly means we can remove some special cases which occurred during tree building. As part of this, fix some errors converting broken expressions like `x var"y"` from `SyntaxNode` to `Expr`.
1 parent 96137ed commit 1b048aa

File tree

8 files changed

+32
-23
lines changed

8 files changed

+32
-23
lines changed

src/expr.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,10 @@ end
196196
# Convert internal node of the JuliaSyntax parse tree to an Expr
197197
function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args)
198198
k = kind(head)
199-
if k == K"var" || k == K"char"
200-
@check length(args) == 1
199+
if (k == K"var" || k == K"char") && length(args) == 1
200+
# Ideally we'd like `@check length(args) == 1` as an invariant for all
201+
# K"var" and K"char" nodes, but this discounts having embedded error
202+
# nodes when ignore_errors=true is set.
201203
return args[1]
202204
elseif k == K"string" || k == K"cmdstring"
203205
return _string_to_Expr(k, args)

src/literal_parsing.jl

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
Nontrivia tokens (leaf nodes / literals) which are malformed are parsed into
3+
ErrorVal when `ignore_errors=true` during parsing.
4+
"""
5+
struct ErrorVal
6+
end
7+
8+
Base.show(io::IO, ::ErrorVal) = printstyled(io, "", color=:light_red)
9+
110
#-------------------------------------------------------------------------------
211
# This file contains utility functions for converting undecorated source
312
# strings into Julia values. For example, string->number, string unescaping, etc.
@@ -364,7 +373,6 @@ end
364373

365374
#-------------------------------------------------------------------------------
366375
function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
367-
# Leaf node
368376
k = kind(head)
369377
# Any errors parsing literals are represented as ErrorVal() - this can
370378
# happen when the user sets `ignore_errors=true` during parsing.
@@ -404,6 +412,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
404412
return false
405413
end
406414

415+
# TODO: Avoid allocating temporary String here
407416
val_str = String(txtbuf[srcrange])
408417
if k == K"Integer"
409418
parse_int_literal(val_str)
@@ -417,9 +426,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
417426
else
418427
Symbol(normalize_identifier(val_str))
419428
end
420-
elseif is_keyword(k)
421-
# This should only happen for tokens nested inside errors
422-
Symbol(val_str)
423429
elseif is_operator(k)
424430
isempty(srcrange) ?
425431
Symbol(untokenize(k)) : # synthetic invisible tokens
@@ -436,9 +442,12 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
436442
Symbol("core_@cmd")
437443
elseif is_syntax_kind(head)
438444
nothing
445+
elseif is_keyword(k)
446+
# This should only happen for tokens nested inside errors
447+
Symbol(val_str)
439448
else
440-
# FIXME: this allows us to recover from trivia is_error nodes
441-
# that we insert below
449+
# Other kinds should only happen for tokens nested inside errors
450+
# TODO: Consolidate this with the is_keyword() above? Something else?
442451
ErrorVal()
443452
end
444453
end

src/parse_stream.jl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,7 +1021,7 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream;
10211021
while true
10221022
last_token = j <= lastindex(ranges) ?
10231023
ranges[j].last_token : lastindex(tokens)
1024-
# Process tokens to nodes for all tokens used by the next internal node
1024+
# Process tokens to leaf nodes for all tokens used by the next internal node
10251025
while i <= last_token
10261026
t = tokens[i]
10271027
if kind(t) == K"TOMBSTONE"
@@ -1031,9 +1031,7 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream;
10311031
srcrange = (stream.tokens[i-1].next_byte:
10321032
stream.tokens[i].next_byte - 1)
10331033
h = head(t)
1034-
children = (is_syntax_kind(h) || is_keyword(h)) ?
1035-
(stack[n].node for n=1:0) : nothing
1036-
node = make_node(h, srcrange, children)
1034+
node = make_node(h, srcrange, nothing)
10371035
if !isnothing(node)
10381036
push!(stack, (first_token=i, node=node))
10391037
end

src/parser.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1955,7 +1955,8 @@ function parse_resword(ps::ParseState)
19551955
elseif word in KSet"break continue"
19561956
# break ==> (break)
19571957
# continue ==> (continue)
1958-
bump(ps)
1958+
bump(ps, TRIVIA_FLAG)
1959+
emit(ps, mark, word)
19591960
k = peek(ps)
19601961
if !(k in KSet"NewlineWs ; ) : EndMarker" || (k == K"end" && !ps.end_symbol))
19611962
recover(is_closer_or_newline, ps, TRIVIA_FLAG,

src/syntax_tree.jl

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,6 @@ text by calling one of the parser API functions such as [`parseall`](@ref)
5353
"""
5454
const SyntaxNode = TreeNode{SyntaxData}
5555

56-
# Value of an error node with no children
57-
struct ErrorVal
58-
end
59-
60-
Base.show(io::IO, ::ErrorVal) = printstyled(io, "", color=:light_red)
61-
6256
function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead};
6357
keep_parens=false, position::Integer=1)
6458
GC.@preserve source begin
@@ -71,7 +65,7 @@ end
7165
function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int,
7266
raw::GreenNode{SyntaxHead},
7367
position::Int, keep_parens::Bool)
74-
if !haschildren(raw) && !(is_syntax_kind(raw) || is_keyword(raw))
68+
if !haschildren(raw)
7569
# Here we parse the values eagerly rather than representing them as
7670
# strings. Maybe this is good. Maybe not.
7771
valrange = position:position + span(raw) - 1

test/expr.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,11 @@
743743
@test parsestmt("(x", ignore_errors=true) ==
744744
Expr(:block, :x, Expr(:error))
745745
@test parsestmt("x do", ignore_errors=true) ==
746-
Expr(:block, :x, Expr(:error, Expr(:do_lambda)))
746+
Expr(:block, :x, Expr(:error, :do))
747+
@test parsestmt("x var\"y\"", ignore_errors=true) ==
748+
Expr(:block, :x, Expr(:error, :var, ErrorVal(), "y", ErrorVal()))
749+
@test parsestmt("var\"y", ignore_errors=true) ==
750+
Expr(:var, :y, Expr(:error))
747751
end
748752

749753
@testset "import" begin

test/parser.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ tests = [
491491
((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))"
492492
((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))"
493493
"struct A end" => "(struct A (block))"
494-
"struct try end" => "(struct (error (try)) (block))"
494+
"struct try end" => "(struct (error try) (block))"
495495
# return
496496
"return\nx" => "(return)"
497497
"return)" => "(return)"
@@ -503,7 +503,7 @@ tests = [
503503
# module/baremodule
504504
"module A end" => "(module A (block))"
505505
"baremodule A end" => "(module-bare A (block))"
506-
"module do \n end" => "(module (error (do)) (block))"
506+
"module do \n end" => "(module (error do) (block))"
507507
"module \$A end" => "(module (\$ A) (block))"
508508
"module A \n a \n b \n end" => "(module A (block a b))"
509509
"""module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))"""

test/test_utils.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ using .JuliaSyntax:
1919
# Nodes
2020
GreenNode,
2121
SyntaxNode,
22+
ErrorVal,
2223
# Node inspection
2324
kind,
2425
flags,

0 commit comments

Comments
 (0)