From ffd82ef1dfb5373a07cd04845dc03d0059c8ae84 Mon Sep 17 00:00:00 2001 From: David Grant Date: Thu, 20 Jun 2024 20:44:18 -0700 Subject: [PATCH 1/7] Initial separator lexing. --- internal/parser/lexer.go | 50 ++++++++++++++++++++++++++++++++--- internal/parser/lexer_test.go | 22 ++++++++++++++- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/internal/parser/lexer.go b/internal/parser/lexer.go index 37434362..f26978f7 100644 --- a/internal/parser/lexer.go +++ b/internal/parser/lexer.go @@ -358,7 +358,21 @@ func (l *lexer) resetTokenStart() { l.tokenStartLoc = l.location() } +// tokenKindPostprocessors defines a transformation of the lexed token string +// before it is stored in the tokens list. It is optional for each token kind. +var tokenKindPostprocessors = map[tokenKind]func(string) string{ + tokenNumber: func(s string) string { + // Get rid of underscore digit separators. + return strings.ReplaceAll(s, "_", "") + }, +} + func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) { + // Run the postprocessor if the token kind has one defined. + if pp, ok := tokenKindPostprocessors[kind]; ok { + data = pp(data) + } + l.tokens = append(l.tokens, token{ kind: kind, fodder: l.fodder, @@ -451,7 +465,7 @@ func (l *lexer) lexUntilNewline() (string, int, int) { // that the next rune to be served by the lexer will be a leading digit. func (l *lexer) lexNumber() error { // This function should be understood with reference to the linked image: - // http://www.json.org/number.gif + // https://www.json.org/img/number.png // Note, we deviate from the json.org documentation as follows: // There is no reason to lex negative numbers as atomic tokens, it is better to parse them @@ -465,9 +479,11 @@ func (l *lexer) lexNumber() error { numAfterOneToNine numAfterDot numAfterDigit + numAfterUnderscore numAfterE numAfterExpSign numAfterExpDigit + numAfterExpUnderscore ) state := numBegin @@ -492,6 +508,9 @@ outerLoop: state = numAfterDot case 'e', 'E': state = numAfterE + case '_': + state = numAfterUnderscore + default: break outerLoop } @@ -503,6 +522,8 @@ outerLoop: state = numAfterE case r >= '0' && r <= '9': state = numAfterOneToNine + case r == '_': + state = numAfterUnderscore default: break outerLoop } @@ -521,9 +542,28 @@ outerLoop: state = numAfterE case r >= '0' && r <= '9': state = numAfterDigit + case r == '_': + state = numAfterUnderscore default: break outerLoop } + + case numAfterUnderscore: + // The only valid transition out of _ is to a digit. + switch { + case r == '_': + return l.makeStaticErrorPoint( + "Couldn't lex number, multiple consecutive _'s", + l.location()) + + case r >= '0' && r <= '9': + state = numAfterExpDigit + + default: + return l.makeStaticErrorPoint( + fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)), + l.location()) + } case numAfterE: switch { case r == '+' || r == '-': @@ -545,9 +585,12 @@ outerLoop: } case numAfterExpDigit: - if r >= '0' && r <= '9' { + switch { + case r >= '0' && r <= '9': state = numAfterExpDigit - } else { + case r == '_': + state = numAfterUnderscore + default: break outerLoop } } @@ -965,7 +1008,6 @@ func Lex(diagnosticFilename ast.DiagnosticFileName, importedFilename, input stri fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)), l.location()) } - } } diff --git a/internal/parser/lexer_test.go b/internal/parser/lexer_test.go index c54ff0ec..e7f2bee1 100644 --- a/internal/parser/lexer_test.go +++ b/internal/parser/lexer_test.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -314,6 +314,26 @@ func TestNumber1epExc(t *testing.T) { SingleTest(t, "1e+!", "snippet:1:4 Couldn't lex number, junk after exponent sign: '!'", Tokens{}) } +func TestNumberSeparators(t *testing.T) { + + SingleTest(t, "123_456", "", Tokens{{kind: tokenNumber, data: "123456"}}) + + /* + testLex("number 123_456", "123_456", {Token(Token::Kind::NUMBER, "123456")}, ""); + testLex("number 1_750_000", "1_750_000", {Token(Token::Kind::NUMBER, "1750000")}, ""); + testLex("number 1_2_3", "1_2_3", {Token(Token::Kind::NUMBER, "123")}, ""); + testLex("number 3.141_592", "3.141_592", {Token(Token::Kind::NUMBER, "3.141592")}, ""); + testLex("number 01_100", "01_100", {Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "1100")}, ""); + testLex("number 1_200.0", "1_200.0", {Token(Token::Kind::NUMBER, "1200.0")}, ""); + testLex("number 0e1_01", "0e1_01", {Token(Token::Kind::NUMBER, "0e101")}, ""); + testLex("number 10_10e3", "10_10e3", {Token(Token::Kind::NUMBER, "1010e3")}, ""); + testLex("number 2_3e1_2", "2_3e1_2", {Token(Token::Kind::NUMBER, "23e12")}, ""); + testLex("number 1.1_2e100", "1.1_2e100", {Token(Token::Kind::NUMBER, "1.12e100")}, ""); + testLex("number 1.1e-10_1", "1.1e-10_1", {Token(Token::Kind::NUMBER, "1.1e-101")}, ""); + testLex("number 9.109_383_56e-31", "9.109_383_56e-31", {Token(Token::Kind::NUMBER, "9.10938356e-31")}, ""); + */ +} + func TestDoublestring1(t *testing.T) { SingleTest(t, "\"hi\"", "", Tokens{ {kind: tokenStringDouble, data: "hi"}, From 4559e0b5925036fab205fd1007583a7f1afeb472 Mon Sep 17 00:00:00 2001 From: David Grant Date: Thu, 20 Jun 2024 20:57:28 -0700 Subject: [PATCH 2/7] More tests. Some fail. --- internal/parser/lexer_test.go | 91 +++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 16 deletions(-) diff --git a/internal/parser/lexer_test.go b/internal/parser/lexer_test.go index e7f2bee1..6a65faea 100644 --- a/internal/parser/lexer_test.go +++ b/internal/parser/lexer_test.go @@ -16,6 +16,7 @@ limitations under the License. package parser import ( + "fmt" "testing" "github.com/google/go-jsonnet/ast" @@ -315,23 +316,81 @@ func TestNumber1epExc(t *testing.T) { } func TestNumberSeparators(t *testing.T) { + cases := [...]struct { + input string + err string + tokens Tokens + }{ + { + input: "123_456", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "123456"}}, + }, + { + input: "1_750_000", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "1750000"}}, + }, + { + input: "1_2_3", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "123"}}, + }, + { + input: "3.141_592", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "3.141592"}}, + }, + { + input: "01_100", + err: "", + tokens: Tokens{ + {kind: tokenNumber, data: "0"}, + {kind: tokenNumber, data: "1100"}, + }, + }, + { + input: "1_200.0", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "1200.0"}}, + }, + { + input: "0e1_01", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "0e101"}}, + }, + { + input: "10_10e3", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "1010e3"}}, + }, + { + input: "2_3e1_2", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "23e12"}}, + }, + { + input: "1.1_2e100", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "1.12e100"}}, + }, + { + input: "1.1e-10_1", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "1.1e-101"}}, + }, + { + input: "9.109_383_56e-31", + err: "", + tokens: Tokens{{kind: tokenNumber, data: "9.10938356e-31"}}, + }, + } - SingleTest(t, "123_456", "", Tokens{{kind: tokenNumber, data: "123456"}}) - - /* - testLex("number 123_456", "123_456", {Token(Token::Kind::NUMBER, "123456")}, ""); - testLex("number 1_750_000", "1_750_000", {Token(Token::Kind::NUMBER, "1750000")}, ""); - testLex("number 1_2_3", "1_2_3", {Token(Token::Kind::NUMBER, "123")}, ""); - testLex("number 3.141_592", "3.141_592", {Token(Token::Kind::NUMBER, "3.141592")}, ""); - testLex("number 01_100", "01_100", {Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "1100")}, ""); - testLex("number 1_200.0", "1_200.0", {Token(Token::Kind::NUMBER, "1200.0")}, ""); - testLex("number 0e1_01", "0e1_01", {Token(Token::Kind::NUMBER, "0e101")}, ""); - testLex("number 10_10e3", "10_10e3", {Token(Token::Kind::NUMBER, "1010e3")}, ""); - testLex("number 2_3e1_2", "2_3e1_2", {Token(Token::Kind::NUMBER, "23e12")}, ""); - testLex("number 1.1_2e100", "1.1_2e100", {Token(Token::Kind::NUMBER, "1.12e100")}, ""); - testLex("number 1.1e-10_1", "1.1e-10_1", {Token(Token::Kind::NUMBER, "1.1e-101")}, ""); - testLex("number 9.109_383_56e-31", "9.109_383_56e-31", {Token(Token::Kind::NUMBER, "9.10938356e-31")}, ""); - */ + for _, c := range cases { + t.Run(fmt.Sprintf("number %s", c.input), func(t *testing.T) { + SingleTest(t, c.input, c.err, c.tokens) + }) + } } func TestDoublestring1(t *testing.T) { From 18cf3bc68c3eb5dd49516e7ef3012e0d7335ecc3 Mon Sep 17 00:00:00 2001 From: David Grant Date: Thu, 20 Jun 2024 21:01:31 -0700 Subject: [PATCH 3/7] Fix the test. --- internal/parser/lexer.go | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/internal/parser/lexer.go b/internal/parser/lexer.go index f26978f7..4a8bfe4e 100644 --- a/internal/parser/lexer.go +++ b/internal/parser/lexer.go @@ -557,7 +557,7 @@ outerLoop: l.location()) case r >= '0' && r <= '9': - state = numAfterExpDigit + state = numAfterOneToNine default: return l.makeStaticErrorPoint( @@ -589,11 +589,29 @@ outerLoop: case r >= '0' && r <= '9': state = numAfterExpDigit case r == '_': - state = numAfterUnderscore + state = numAfterExpUnderscore default: break outerLoop } + + case numAfterExpUnderscore: + // The only valid transition out of _ is to a digit. + switch { + case r == '_': + return l.makeStaticErrorPoint( + "Couldn't lex number, multiple consecutive _'s", + l.location()) + + case r >= '0' && r <= '9': + state = numAfterExpDigit + + default: + return l.makeStaticErrorPoint( + fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)), + l.location()) + } } + l.next() } From c984516edb66738bdf44cd32bf7694dc04809c7f Mon Sep 17 00:00:00 2001 From: David Grant Date: Fri, 21 Jun 2024 08:28:11 -0700 Subject: [PATCH 4/7] Add exceptional test cases. Make case table less crazy. --- internal/parser/lexer_test.go | 96 +++++++++++------------------------ 1 file changed, 29 insertions(+), 67 deletions(-) diff --git a/internal/parser/lexer_test.go b/internal/parser/lexer_test.go index 6a65faea..fc1793c8 100644 --- a/internal/parser/lexer_test.go +++ b/internal/parser/lexer_test.go @@ -316,77 +316,39 @@ func TestNumber1epExc(t *testing.T) { } func TestNumberSeparators(t *testing.T) { - cases := [...]struct { + type numcase struct { input string err string tokens Tokens - }{ - { - input: "123_456", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "123456"}}, - }, - { - input: "1_750_000", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "1750000"}}, - }, - { - input: "1_2_3", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "123"}}, - }, - { - input: "3.141_592", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "3.141592"}}, - }, - { - input: "01_100", - err: "", - tokens: Tokens{ - {kind: tokenNumber, data: "0"}, - {kind: tokenNumber, data: "1100"}, - }, - }, - { - input: "1_200.0", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "1200.0"}}, - }, - { - input: "0e1_01", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "0e101"}}, - }, - { - input: "10_10e3", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "1010e3"}}, - }, - { - input: "2_3e1_2", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "23e12"}}, - }, - { - input: "1.1_2e100", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "1.12e100"}}, - }, - { - input: "1.1e-10_1", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "1.1e-101"}}, - }, - { - input: "9.109_383_56e-31", - err: "", - tokens: Tokens{{kind: tokenNumber, data: "9.10938356e-31"}}, - }, } - - for _, c := range cases { + mknumcase := func(input string, err string, tokens Tokens) numcase { + return numcase{input, err, tokens} + } + for _, c := range [...]numcase{ + mknumcase("123_456", "", Tokens{{kind: tokenNumber, data: "123456"}}), + mknumcase("1_750_000", "", Tokens{{kind: tokenNumber, data: "1750000"}}), + mknumcase("1_2_3", "", Tokens{{kind: tokenNumber, data: "123"}}), + mknumcase("3.141_592", "", Tokens{{kind: tokenNumber, data: "3.141592"}}), + mknumcase("01_100", "", Tokens{ + {kind: tokenNumber, data: "0"}, + {kind: tokenNumber, data: "1100"}, + }), + mknumcase("1_200.0", "", Tokens{{kind: tokenNumber, data: "1200.0"}}), + mknumcase("0e1_01", "", Tokens{{kind: tokenNumber, data: "0e101"}}), + mknumcase("10_10e3", "", Tokens{{kind: tokenNumber, data: "1010e3"}}), + mknumcase("2_3e1_2", "", Tokens{{kind: tokenNumber, data: "23e12"}}), + mknumcase("1.1_2e100", "", Tokens{{kind: tokenNumber, data: "1.12e100"}}), + mknumcase("1.1e-10_1", "", Tokens{{kind: tokenNumber, data: "1.1e-101"}}), + mknumcase("9.109_383_56e-31", "", Tokens{{kind: tokenNumber, data: "9.10938356e-31"}}), + mknumcase("123456_!", "snippet:1:8 Couldn't lex number, junk after '_': '!'", Tokens{}), + mknumcase("123__456", "snippet:1:5 Couldn't lex number, multiple consecutive _'s", Tokens{}), + mknumcase("1_200_.0", "snippet:1:7 Couldn't lex number, junk after '_': '.'", Tokens{}), + mknumcase("1_200._0", "snippet:1:7 Couldn't lex number, junk after decimal point: '_'", Tokens{}), + mknumcase("1_200_e2", "snippet:1:7 Couldn't lex number, junk after '_': 'e'", Tokens{}), + mknumcase("1_200e_2", "snippet:1:7 Couldn't lex number, junk after 'E': '_'", Tokens{}), + mknumcase("200e-_2", "snippet:1:6 Couldn't lex number, junk after exponent sign: '_'", Tokens{}), + mknumcase("200e+_2", "snippet:1:6 Couldn't lex number, junk after exponent sign: '_'", Tokens{}), + } { t.Run(fmt.Sprintf("number %s", c.input), func(t *testing.T) { SingleTest(t, c.input, c.err, c.tokens) }) From 09df750de7fe8d3f7aa4e7cdbb4671d5faeed1df Mon Sep 17 00:00:00 2001 From: David Grant Date: Fri, 21 Jun 2024 08:35:02 -0700 Subject: [PATCH 5/7] Just use struct literals. --- internal/parser/lexer_test.go | 51 +++++++++++++++-------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/internal/parser/lexer_test.go b/internal/parser/lexer_test.go index fc1793c8..2fbbf9fd 100644 --- a/internal/parser/lexer_test.go +++ b/internal/parser/lexer_test.go @@ -316,38 +316,31 @@ func TestNumber1epExc(t *testing.T) { } func TestNumberSeparators(t *testing.T) { - type numcase struct { + for _, c := range []struct { input string err string tokens Tokens - } - mknumcase := func(input string, err string, tokens Tokens) numcase { - return numcase{input, err, tokens} - } - for _, c := range [...]numcase{ - mknumcase("123_456", "", Tokens{{kind: tokenNumber, data: "123456"}}), - mknumcase("1_750_000", "", Tokens{{kind: tokenNumber, data: "1750000"}}), - mknumcase("1_2_3", "", Tokens{{kind: tokenNumber, data: "123"}}), - mknumcase("3.141_592", "", Tokens{{kind: tokenNumber, data: "3.141592"}}), - mknumcase("01_100", "", Tokens{ - {kind: tokenNumber, data: "0"}, - {kind: tokenNumber, data: "1100"}, - }), - mknumcase("1_200.0", "", Tokens{{kind: tokenNumber, data: "1200.0"}}), - mknumcase("0e1_01", "", Tokens{{kind: tokenNumber, data: "0e101"}}), - mknumcase("10_10e3", "", Tokens{{kind: tokenNumber, data: "1010e3"}}), - mknumcase("2_3e1_2", "", Tokens{{kind: tokenNumber, data: "23e12"}}), - mknumcase("1.1_2e100", "", Tokens{{kind: tokenNumber, data: "1.12e100"}}), - mknumcase("1.1e-10_1", "", Tokens{{kind: tokenNumber, data: "1.1e-101"}}), - mknumcase("9.109_383_56e-31", "", Tokens{{kind: tokenNumber, data: "9.10938356e-31"}}), - mknumcase("123456_!", "snippet:1:8 Couldn't lex number, junk after '_': '!'", Tokens{}), - mknumcase("123__456", "snippet:1:5 Couldn't lex number, multiple consecutive _'s", Tokens{}), - mknumcase("1_200_.0", "snippet:1:7 Couldn't lex number, junk after '_': '.'", Tokens{}), - mknumcase("1_200._0", "snippet:1:7 Couldn't lex number, junk after decimal point: '_'", Tokens{}), - mknumcase("1_200_e2", "snippet:1:7 Couldn't lex number, junk after '_': 'e'", Tokens{}), - mknumcase("1_200e_2", "snippet:1:7 Couldn't lex number, junk after 'E': '_'", Tokens{}), - mknumcase("200e-_2", "snippet:1:6 Couldn't lex number, junk after exponent sign: '_'", Tokens{}), - mknumcase("200e+_2", "snippet:1:6 Couldn't lex number, junk after exponent sign: '_'", Tokens{}), + }{ + {"123_456", "", Tokens{{kind: tokenNumber, data: "123456"}}}, + {"1_750_000", "", Tokens{{kind: tokenNumber, data: "1750000"}}}, + {"1_2_3", "", Tokens{{kind: tokenNumber, data: "123"}}}, + {"3.141_592", "", Tokens{{kind: tokenNumber, data: "3.141592"}}}, + {"01_100", "", Tokens{{kind: tokenNumber, data: "0"}, {kind: tokenNumber, data: "1100"}}}, + {"1_200.0", "", Tokens{{kind: tokenNumber, data: "1200.0"}}}, + {"0e1_01", "", Tokens{{kind: tokenNumber, data: "0e101"}}}, + {"10_10e3", "", Tokens{{kind: tokenNumber, data: "1010e3"}}}, + {"2_3e1_2", "", Tokens{{kind: tokenNumber, data: "23e12"}}}, + {"1.1_2e100", "", Tokens{{kind: tokenNumber, data: "1.12e100"}}}, + {"1.1e-10_1", "", Tokens{{kind: tokenNumber, data: "1.1e-101"}}}, + {"9.109_383_56e-31", "", Tokens{{kind: tokenNumber, data: "9.10938356e-31"}}}, + {"123456_!", "snippet:1:8 Couldn't lex number, junk after '_': '!'", Tokens{}}, + {"123__456", "snippet:1:5 Couldn't lex number, multiple consecutive _'s", Tokens{}}, + {"1_200_.0", "snippet:1:7 Couldn't lex number, junk after '_': '.'", Tokens{}}, + {"1_200._0", "snippet:1:7 Couldn't lex number, junk after decimal point: '_'", Tokens{}}, + {"1_200_e2", "snippet:1:7 Couldn't lex number, junk after '_': 'e'", Tokens{}}, + {"1_200e_2", "snippet:1:7 Couldn't lex number, junk after 'E': '_'", Tokens{}}, + {"200e-_2", "snippet:1:6 Couldn't lex number, junk after exponent sign: '_'", Tokens{}}, + {"200e+_2", "snippet:1:6 Couldn't lex number, junk after exponent sign: '_'", Tokens{}}, } { t.Run(fmt.Sprintf("number %s", c.input), func(t *testing.T) { SingleTest(t, c.input, c.err, c.tokens) From f4d4c5f75d9b27cae4b20815701b8dbf8efb7a2a Mon Sep 17 00:00:00 2001 From: David Grant Date: Fri, 21 Jun 2024 20:33:25 -0700 Subject: [PATCH 6/7] Add a test for _123 lexing as identifier. --- internal/parser/lexer_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/parser/lexer_test.go b/internal/parser/lexer_test.go index 2fbbf9fd..4b7ae444 100644 --- a/internal/parser/lexer_test.go +++ b/internal/parser/lexer_test.go @@ -525,6 +525,12 @@ func TestIdentifiers(t *testing.T) { }) } +func TestIdentifierUnderscore(t *testing.T) { + SingleTest(t, "_123", "", Tokens{ + {kind: tokenIdentifier, data: "_123"}, + }) +} + func TestCppComment(t *testing.T) { SingleTest(t, "// hi", "", Tokens{ {kind: tokenEndOfFile, fodder: ast.Fodder{{Kind: ast.FodderParagraph, Comment: []string{"// hi"}}}}, From f10caa02447fc763dd995e4e9354a409a9892f8f Mon Sep 17 00:00:00 2001 From: David Grant Date: Sun, 23 Jun 2024 14:59:24 -0700 Subject: [PATCH 7/7] Simpler to not special-case consecutive _s. --- internal/parser/lexer.go | 12 ------------ internal/parser/lexer_test.go | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/internal/parser/lexer.go b/internal/parser/lexer.go index 4a8bfe4e..a32a663c 100644 --- a/internal/parser/lexer.go +++ b/internal/parser/lexer.go @@ -551,14 +551,8 @@ outerLoop: case numAfterUnderscore: // The only valid transition out of _ is to a digit. switch { - case r == '_': - return l.makeStaticErrorPoint( - "Couldn't lex number, multiple consecutive _'s", - l.location()) - case r >= '0' && r <= '9': state = numAfterOneToNine - default: return l.makeStaticErrorPoint( fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)), @@ -597,14 +591,8 @@ outerLoop: case numAfterExpUnderscore: // The only valid transition out of _ is to a digit. switch { - case r == '_': - return l.makeStaticErrorPoint( - "Couldn't lex number, multiple consecutive _'s", - l.location()) - case r >= '0' && r <= '9': state = numAfterExpDigit - default: return l.makeStaticErrorPoint( fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)), diff --git a/internal/parser/lexer_test.go b/internal/parser/lexer_test.go index 4b7ae444..8d92f0c0 100644 --- a/internal/parser/lexer_test.go +++ b/internal/parser/lexer_test.go @@ -334,7 +334,7 @@ func TestNumberSeparators(t *testing.T) { {"1.1e-10_1", "", Tokens{{kind: tokenNumber, data: "1.1e-101"}}}, {"9.109_383_56e-31", "", Tokens{{kind: tokenNumber, data: "9.10938356e-31"}}}, {"123456_!", "snippet:1:8 Couldn't lex number, junk after '_': '!'", Tokens{}}, - {"123__456", "snippet:1:5 Couldn't lex number, multiple consecutive _'s", Tokens{}}, + {"123__456", "snippet:1:5 Couldn't lex number, junk after '_': '_'", Tokens{}}, {"1_200_.0", "snippet:1:7 Couldn't lex number, junk after '_': '.'", Tokens{}}, {"1_200._0", "snippet:1:7 Couldn't lex number, junk after decimal point: '_'", Tokens{}}, {"1_200_e2", "snippet:1:7 Couldn't lex number, junk after '_': 'e'", Tokens{}},