Skip to content

Commit 1000f00

Browse files
committed
Fix dstr unparsing
* This is an entirely new approach. * Instead to find the "correct" dstr segments we simply try all and unparse the first one that round trips. * This so far guarantees we always get good concrete syntax, but it can be time intensive as the combinatoric space of possible dynamic string sequence is quadratic with the dstr children size. * For this reason we try above (currently) dstr children to unparse as heredoc first. * Passes the entire corpus and fixes bugs. [fix #249]
1 parent 875dda1 commit 1000f00

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1508
-825
lines changed

Changelog.md

+7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# v0.7.0 2024-09-16
2+
3+
[#366](https://github.com/mbj/unparser/pull/366)
4+
5+
* Fix all known dstring issues.
6+
* Interface changes.
7+
18
# v0.6.15 2024-06-10
29

310
[#373](https://github.com/mbj/unparser/pull/373)

Gemfile

+2
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22

33
source 'https://rubygems.org'
44

5+
gem 'mutant', path: '../mutant'
6+
57
gemspec

Gemfile.lock

+14-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
1+
PATH
2+
remote: ../mutant
3+
specs:
4+
mutant (0.12.4)
5+
diff-lcs (~> 1.3)
6+
parser (~> 3.3.0)
7+
regexp_parser (~> 2.9.0)
8+
sorbet-runtime (~> 0.5.0)
9+
unparser (~> 0.7.0)
10+
111
PATH
212
remote: .
313
specs:
4-
unparser (0.6.15)
14+
unparser (0.7.0)
515
diff-lcs (~> 1.6)
616
parser (>= 3.3.0)
717

@@ -12,12 +22,6 @@ GEM
1222
diff-lcs (1.6.0)
1323
json (2.10.1)
1424
language_server-protocol (3.17.0.4)
15-
mutant (0.12.4)
16-
diff-lcs (~> 1.3)
17-
parser (~> 3.3.0)
18-
regexp_parser (~> 2.9.0)
19-
sorbet-runtime (~> 0.5.0)
20-
unparser (~> 0.6.14)
2125
mutant-rspec (0.12.4)
2226
mutant (= 0.12.4)
2327
rspec-core (>= 3.8.0, < 4.0.0)
@@ -66,9 +70,10 @@ GEM
6670

6771
PLATFORMS
6872
ruby
73+
x86_64-linux
6974

7075
DEPENDENCIES
71-
mutant (~> 0.12.4)
76+
mutant!
7277
mutant-rspec (~> 0.12.4)
7378
rspec (~> 3.13)
7479
rspec-core (~> 3.13)
@@ -78,4 +83,4 @@ DEPENDENCIES
7883
unparser!
7984

8085
BUNDLED WITH
81-
2.5.10
86+
2.5.22

bin/corpus

+92-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
#!/usr/bin/env ruby
22
# frozen_string_literal: true
33

4+
require 'etc'
45
require 'mutant'
56
require 'optparse'
7+
require 'pathname'
68
require 'unparser'
79

10+
Thread.abort_on_exception = true
11+
812
module Unparser
913
module Corpus
1014
ROOT = Pathname.new(__dir__).parent
@@ -17,16 +21,85 @@ module Unparser
1721
#
1822
# @return [Boolean]
1923
def verify
24+
puts("Verifiying: #{name}")
2025
checkout
21-
command = %W[unparser #{repo_path}]
22-
exclude.each do |name|
23-
command.push('--ignore', repo_path.join(name).to_s)
26+
27+
paths = Pathname.glob(Pathname.new(repo_path).join('**/*.rb'))
28+
29+
driver = Mutant::Parallel.async(
30+
config: Mutant::Parallel::Config.new(
31+
block: method(:verify_path),
32+
jobs: Etc.nprocessors,
33+
on_process_start: ->(*) {},
34+
process_name: 'unparser-corpus-test',
35+
sink: Sink.new,
36+
source: Mutant::Parallel::Source::Array.new(jobs: paths),
37+
thread_name: 'unparser-corpus-test',
38+
timeout: nil
39+
),
40+
world: Mutant::WORLD
41+
)
42+
43+
loop do
44+
status = driver.wait_timeout(1)
45+
46+
puts("Processed: #{status.payload.total}")
47+
48+
status.payload.errors.each do |report|
49+
puts report
50+
fail
51+
end
52+
53+
break if status.done?
2454
end
25-
Kernel.system(*command)
55+
56+
true
2657
end
2758

2859
private
2960

61+
class Sink
62+
include Mutant::Parallel::Sink
63+
64+
attr_reader :errors, :total
65+
66+
def initialize
67+
@errors = []
68+
@total = 0
69+
end
70+
71+
def stop?
72+
!@errors.empty?
73+
end
74+
75+
def status
76+
self
77+
end
78+
79+
def response(response)
80+
if response.error
81+
Mutant::WORLD.stderr.puts(response.log)
82+
fail response.error
83+
end
84+
85+
@total += 1
86+
87+
if response.result
88+
@errors << response.result
89+
end
90+
end
91+
end
92+
93+
def verify_path(path)
94+
validation = Validation.from_path(path)
95+
96+
if original_syntax_error?(validation) || generated_encoding_error?(validation) || validation.success?
97+
return
98+
end
99+
100+
validation.report
101+
end
102+
30103
def checkout
31104
TMP.mkdir unless TMP.directory?
32105

@@ -50,6 +123,21 @@ module Unparser
50123
TMP.join(name)
51124
end
52125

126+
private
127+
128+
# This happens if the original source contained a non UTF charset meta comment.
129+
# These are not exposed to the AST in a way unparser could know about to generate a non UTF-8
130+
# target and emit that meta comment itself.
131+
# For the purpose of corpus testing these cases are ignored.
132+
def generated_encoding_error?(validation)
133+
exception = validation.generated_node.from_left { return false }
134+
exception.instance_of?(Parser::SyntaxError) && exception.message.eql?('literal contains escape sequences incompatible with UTF-8')
135+
end
136+
137+
def original_syntax_error?(validation)
138+
validation.original_node.from_left { return false }.instance_of?(Parser::SyntaxError)
139+
end
140+
53141
def system(arguments)
54142
return if Kernel.system(*arguments)
55143

bin/parser-round-trip-test

+33-8
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,14 @@ class Test
4040
:rubies
4141
)
4242

43-
EXPECT_FAILURE = {}.freeze
43+
EXPECT_FAILURE = {}.freeze
44+
STATIC_LOCAL_VARIABLES = %w[foo bar baz].to_set.freeze
45+
46+
NO_ROUND_TRIP = %i[
47+
test_int___LINE__
48+
test_pattern_matching__FILE__LINE_literals
49+
test_string___FILE__
50+
].freeze
4451

4552
def legacy_attributes
4653
default_builder_attributes.reject do |attribute_name, value|
@@ -56,6 +63,8 @@ class Test
5663
"Non targeted rubies: #{rubies.join(',')}"
5764
elsif validation.original_node.left?
5865
'Test specifies a syntax error'
66+
elsif NO_ROUND_TRIP.include?(name)
67+
'Test not round trippable'
5968
end
6069
end
6170

@@ -77,20 +86,25 @@ class Test
7786

7887
# rubocop:disable Metrics/AbcSize
7988
def validation
80-
identification = name.to_s
89+
identification = name.to_s
90+
91+
ast = Unparser::AST.new(
92+
comments: [],
93+
explicit_encoding: nil,
94+
node: node,
95+
static_local_variables: STATIC_LOCAL_VARIABLES
96+
)
8197

82-
generated_source = Unparser.unparse_either(node)
98+
generated_source = Unparser.unparse_ast_either(ast)
8399
.fmap { |string| string.dup.force_encoding(parser_source.encoding).freeze }
84100

85-
generated_node = generated_source.bind do |source|
86-
parse_either(source, identification)
87-
end
101+
generated_node = generated_source.bind { |source| parse_either(source, identification) }
88102

89103
Unparser::Validation.new(
90104
generated_node: generated_node,
91105
generated_source: generated_source,
92106
identification: identification,
93-
original_node: parse_either(parser_source, identification).fmap { node },
107+
original_ast: parse_either_ast(parser_source, identification),
94108
original_source: right(parser_source)
95109
)
96110
end
@@ -99,7 +113,7 @@ class Test
99113

100114
def parser
101115
Unparser.parser.tap do |parser|
102-
%w[foo bar baz].each(&parser.static_env.method(:declare))
116+
STATIC_LOCAL_VARIABLES.each(&parser.static_env.method(:declare))
103117
end
104118
end
105119

@@ -108,6 +122,17 @@ class Test
108122
parser.parse(Unparser.buffer(source, identification))
109123
end
110124
end
125+
126+
def parse_either_ast(source, identification)
127+
parse_either(source, identification).fmap do |node|
128+
Unparser::AST.new(
129+
comments: [],
130+
explicit_encoding: nil,
131+
node: node,
132+
static_local_variables: Set.new
133+
)
134+
end
135+
end
111136
end
112137

113138
class Execution

0 commit comments

Comments
 (0)