Skip to content

Commit 01742e6

Browse files
Upstreaming several changes that will be used by the GoogleSQL OSS project.
1 parent 95923b1 commit 01742e6

18 files changed

Lines changed: 608 additions & 111 deletions

File tree

cmd/textmapper/debug.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ Flags:`,
2222
}
2323

2424
var (
25-
stats = debugCmd.Flags.Bool("stats", false, "output generated table statistics")
26-
tables = debugCmd.Flags.Bool("tables", false, "dump generated tables in a human-readable format")
25+
stats = debugCmd.Flags.Bool("stats", false, "output generated table statistics")
26+
tables = debugCmd.Flags.Bool("tables", false, "dump generated tables in a human-readable format")
27+
conflicts = debugCmd.Flags.Bool("conflicts", false, "dump all conflicts in a human-readable format")
2728
)
2829

2930
func init() {
@@ -60,7 +61,7 @@ func debugFile(ctx context.Context, path string) error {
6061
}
6162

6263
start := time.Now()
63-
params := compiler.Params{CollectStats: true, DebugTables: *tables, CheckOnly: true /*disables optimizations*/}
64+
params := compiler.Params{CollectStats: true, DebugTables: *tables, DebugConflicts: *conflicts, CheckOnly: true /*disables optimizations*/}
6465
g, err := compiler.Compile(ctx, path, string(content), params)
6566
if g == nil {
6667
return err

compiler/compiler.go

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@ import (
2121

2222
// Params control the grammar compilation process.
2323
type Params struct {
24-
CheckOnly bool // set to true, if the caller is interested in compilation errors only
25-
Verbose bool // set to true for more verbose errors
26-
DebugTables bool // set to true to get generated tables with embedded debug info
27-
CollectStats bool // set to true to collect more statistics about the LALR algorithm execution
24+
CheckOnly bool // set to true, if the caller is interested in compilation errors only
25+
Verbose bool // set to true for more verbose errors
26+
DebugTables bool // set to true to get generated tables with embedded debug info
27+
DebugConflicts bool // set to true to include conflicts in DebugInfo
28+
CollectStats bool // set to true to collect more statistics about the LALR algorithm execution
2829
}
2930

3031
// Compile validates and compiles grammar files.
@@ -369,13 +370,14 @@ func (c *compiler) compileParser(file ast.File) {
369370
expectSR: loader.expectSR,
370371
expectRR: loader.expectRR,
371372
lalrOpts: lalr.Options{
372-
Lookahead: lookahead,
373-
Optimize: c.out.Options.OptimizeTables && !c.params.CheckOnly,
374-
MinimizeDFA: c.out.Options.MinimizeDFA,
375-
DefaultReduce: c.out.Options.DefaultReduce,
376-
Debug: c.params.DebugTables,
377-
CollectStats: c.params.CollectStats,
378-
Verbose: c.params.Verbose,
373+
Lookahead: lookahead,
374+
Optimize: c.out.Options.OptimizeTables && !c.params.CheckOnly,
375+
MinimizeDFA: c.out.Options.MinimizeDFA,
376+
DefaultReduce: c.out.Options.DefaultReduce,
377+
Debug: c.params.DebugTables,
378+
DebugConflicts: c.params.DebugConflicts,
379+
CollectStats: c.params.CollectStats,
380+
Verbose: c.params.Verbose,
379381
},
380382
}
381383
if err := generateTables(source, c.out, opts, file); err != nil {
@@ -586,6 +588,25 @@ func generateTables(source *syntax.Model, out *grammar.Grammar, opts genOptions,
586588
NtName: nt.Name,
587589
}
588590
if args != nil {
591+
// Note: here we build a union of "missing" aliases across all expansions of a rule.
592+
// All expanded rules share the same original args pointer. We can safely accumulate
593+
// missing aliases across all rule expansions right into args.MayBeMissing.
594+
if args.MayBeMissing == nil {
595+
args.MayBeMissing = make(map[string]bool)
596+
}
597+
for name, positions := range args.Names {
598+
missing := true
599+
for _, p := range positions {
600+
if _, ok := actualPos[p]; ok {
601+
missing = false
602+
break
603+
}
604+
}
605+
if missing {
606+
args.MayBeMissing[name] = true
607+
}
608+
}
609+
589610
act.Vars = &grammar.ActionVars{CmdArgs: *args, Remap: actualPos}
590611
for _, r := range rule.RHS {
591612
if !r.IsStateMarker() {
@@ -613,6 +634,57 @@ func generateTables(source *syntax.Model, out *grammar.Grammar, opts genOptions,
613634
parser.NumTerminals = len(source.Terminals)
614635
midrule.finalize(out, g)
615636

637+
// Assign action ids to rules that will get a default semantic action with a cast. When there is
638+
// no user-provided semantic action, Textmapper generates a default semantic action that forwards
639+
// the first RHS symbol's value. When the type of the first RHS symbol does not match the return
640+
// type, a cast is generated. Assigning action ids prevents states associated with different cast
641+
// behaviors from being merged in the DFA minimization step.
642+
type castKey struct {
643+
lhs string
644+
rhs string
645+
}
646+
castActions := make(map[castKey]int)
647+
for i, r := range parser.Rules {
648+
if r.Action != 0 {
649+
continue // Skip rules with user-provided semantic actions.
650+
}
651+
652+
lhsType := out.Syms[r.LHS].Type
653+
if lhsType == "" || len(r.RHS) == 0 {
654+
continue // Skip rules that do not require a cast.
655+
}
656+
657+
var rhs0Type string
658+
for _, s := range r.RHS {
659+
if s.IsStateMarker() {
660+
continue
661+
}
662+
idx := int(s)
663+
if idx < len(out.Syms) {
664+
rhs0Type = out.Syms[idx].Type
665+
}
666+
break
667+
}
668+
669+
if rhs0Type == "" || lhsType == rhs0Type {
670+
continue // Skip rules that do not require a cast.
671+
}
672+
673+
// Assign a distinct action for each behavior signature.
674+
sig := castKey{lhsType, rhs0Type}
675+
actionID, ok := castActions[sig]
676+
if !ok {
677+
actionID = len(parser.Actions)
678+
castActions[sig] = actionID
679+
parser.Actions = append(parser.Actions, grammar.SemanticAction{
680+
NtName: out.Syms[r.LHS].Name,
681+
Origin: out.Syms[r.LHS].Origin,
682+
})
683+
}
684+
r.Action = actionID
685+
g.Rules[i].Action = actionID
686+
}
687+
616688
tables, err := lalr.Compile(g, opts.lalrOpts)
617689
parser.Tables = tables
618690
return err

compiler/compiler_test.go

Lines changed: 107 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,18 @@ func writeNonterm(nt *syntax.Nonterm, b *strings.Builder) {
156156
}
157157
}
158158

159-
var debugFiles = []string{
160-
"debug.tm",
159+
var debugFiles = []struct {
160+
file string
161+
params Params
162+
}{
163+
{file: "debug.tm", params: Params{DebugTables: true}},
164+
{file: "debug_conflicts.tm", params: Params{DebugConflicts: true}},
161165
}
162166

163167
func TestDebugInfo(t *testing.T) {
164168
ctx := context.Background()
165-
for _, file := range debugFiles {
169+
for _, tc := range debugFiles {
170+
file, params := tc.file, tc.params
166171
content, err := os.ReadFile(filepath.Join("testdata", file))
167172
if err != nil {
168173
t.Errorf("cannot read %v: %v", file, err)
@@ -175,7 +180,7 @@ func TestDebugInfo(t *testing.T) {
175180
continue
176181
}
177182

178-
g, err := Compile(ctx, file, string(content), Params{DebugTables: true})
183+
g, err := Compile(ctx, file, string(content), params)
179184
if err != nil {
180185
t.Errorf("%v: compilation failed with %v", file, err)
181186
continue
@@ -185,14 +190,19 @@ func TestDebugInfo(t *testing.T) {
185190
var b strings.Builder
186191
b.WriteString("\n\n")
187192
for _, info := range g.Parser.Tables.DebugInfo {
193+
if params.DebugTables {
194+
info = strings.ReplaceAll(info, "•", "_")
195+
}
188196
b.WriteString(info)
189197
b.WriteByte('\n')
190198
}
191199
got := b.String()
192200

193201
if diff := diff.LineDiff(want, got); diff != "" {
194202
t.Errorf("The in-file debug info does not match the produced one.\n--- %v\n+++ %v (produced)\n%v", file, file, diff)
195-
t.Logf("Run (cd compiler/testdata; go run ../../cmd/textmapper/*.go debug --tables %v >> %v) to regenerate.", file, file)
203+
if params.DebugTables {
204+
t.Logf("Run (cd compiler/testdata; go run ../../cmd/textmapper/*.go debug --tables %v >> %v) to regenerate.", file, file)
205+
}
196206
}
197207
}
198208
}
@@ -731,6 +741,98 @@ S2 : a ;
731741
State 4 -> State 9 on symbol 'lookahead_S2'
732742
State 1 -> State 11 on symbol 'S1'
733743
State 2 -> State 11 on symbol 'S2'
744+
`,
745+
},
746+
{
747+
name: "castMerging",
748+
grammar: `
749+
language cast_merging(go);
750+
%v
751+
:: lexer
752+
'a': /a/
753+
'b': /b/
754+
'c': /c/
755+
'x' {int}: /x/
756+
'y' {float}: /y/
757+
758+
:: parser
759+
input:
760+
'a' shared 'c'
761+
| 'b' shared 'c'
762+
;
763+
764+
shared {float}:
765+
'x'
766+
| 'y'
767+
;
768+
`,
769+
wantOff: `Transitions for castMerging (minimize OFF) (States: 11):
770+
State 9 -> State 10 on symbol 'eoi'
771+
State 0 -> State 1 on symbol ''a''
772+
State 0 -> State 2 on symbol ''b''
773+
State 5 -> State 7 on symbol ''c''
774+
State 6 -> State 8 on symbol ''c''
775+
State 1 -> State 3 on symbol ''x''
776+
State 2 -> State 3 on symbol ''x''
777+
State 1 -> State 4 on symbol ''y''
778+
State 2 -> State 4 on symbol ''y''
779+
State 0 -> State 9 on symbol 'input'
780+
State 1 -> State 5 on symbol 'shared'
781+
State 2 -> State 6 on symbol 'shared'
782+
`,
783+
wantOn: `Transitions for castMerging (minimize ON) (States: 8):
784+
State 6 -> State 7 on symbol 'eoi'
785+
State 0 -> State 1 on symbol ''a''
786+
State 0 -> State 1 on symbol ''b''
787+
State 4 -> State 5 on symbol ''c''
788+
State 1 -> State 2 on symbol ''x''
789+
State 1 -> State 3 on symbol ''y''
790+
State 0 -> State 6 on symbol 'input'
791+
State 1 -> State 4 on symbol 'shared'
792+
`,
793+
},
794+
{
795+
name: "castMerging2",
796+
grammar: `
797+
language cast_merging2(go);
798+
%v
799+
:: lexer
800+
'a': /a/
801+
'b': /b/
802+
'c': /c/
803+
'x' {int}: /x/
804+
805+
:: parser
806+
input:
807+
'a' r1 'c'
808+
| 'b' r2 'c'
809+
;
810+
811+
r1 {float}: 'x' ; // cast int -> float
812+
r2 {float}: 'x' ; // same cast int -> float
813+
`,
814+
wantOff: `Transitions for castMerging2 (minimize OFF) (States: 11):
815+
State 9 -> State 10 on symbol 'eoi'
816+
State 0 -> State 1 on symbol ''a''
817+
State 0 -> State 2 on symbol ''b''
818+
State 4 -> State 7 on symbol ''c''
819+
State 6 -> State 8 on symbol ''c''
820+
State 1 -> State 3 on symbol ''x''
821+
State 2 -> State 5 on symbol ''x''
822+
State 0 -> State 9 on symbol 'input'
823+
State 1 -> State 4 on symbol 'r1'
824+
State 2 -> State 6 on symbol 'r2'
825+
`,
826+
wantOn: `Transitions for castMerging2 (minimize ON) (States: 9):
827+
State 7 -> State 8 on symbol 'eoi'
828+
State 0 -> State 1 on symbol ''a''
829+
State 0 -> State 2 on symbol ''b''
830+
State 4 -> State 6 on symbol ''c''
831+
State 1 -> State 3 on symbol ''x''
832+
State 2 -> State 5 on symbol ''x''
833+
State 0 -> State 7 on symbol 'input'
834+
State 1 -> State 4 on symbol 'r1'
835+
State 2 -> State 4 on symbol 'r2'
734836
`,
735837
},
736838
}

compiler/options.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ func newOptionsParser(s *status.Status) *optionsParser {
2525
AbslIncludePrefix: "absl",
2626
SkipByteOrderMark: true,
2727
OptInstantiationSuffix: "opt",
28+
AliasIncludesOptSuffix: true,
2829
ExpansionLimit: 65_536,
2930
ExpansionWarn: 256,
3031
MaxRuleSizeForOrdinalRef: 16,
@@ -78,6 +79,8 @@ func (p *optionsParser) parseFrom(file ast.File) {
7879
opts.GenParser = p.parseExpr(opt.Value(), opts.GenParser).(bool)
7980
case "optInstantiationSuffix":
8081
opts.OptInstantiationSuffix = p.parseExpr(opt.Value(), opts.OptInstantiationSuffix).(string)
82+
case "aliasIncludesOptSuffix":
83+
opts.AliasIncludesOptSuffix = p.parseExpr(opt.Value(), opts.AliasIncludesOptSuffix).(bool)
8184
case "cancellable":
8285
p.validLangs(opt.Key(), "go")
8386
opts.Cancellable = p.parseExpr(opt.Value(), opts.Cancellable).(bool)

0 commit comments

Comments
 (0)