diff --git a/pkg/lexer/lexer.go b/pkg/lexer/lexer.go index 71a737f4..e7c0038a 100644 --- a/pkg/lexer/lexer.go +++ b/pkg/lexer/lexer.go @@ -442,6 +442,27 @@ func (l *Lexer) readNumber() (string, tokenizer.TokenType) { l.readChar() } return l.input[position:l.position], tokenizer.INT + } else if next == 'o' || next == 'O' { + // Octal literal (explicit 0o prefix) + l.readChar() // consume '0' + l.readChar() // consume 'o' + if !isOctalDigit(l.ch) { + l.addError("E1010", "invalid octal literal: expected octal digit (0-7) after 0o", startLine, startColumn) + return l.input[position:l.position], tokenizer.INT + } + for isOctalDigit(l.ch) || l.ch == '_' { + if l.ch == '_' { + next := l.peekChar() + if next == '_' { + l.addError("E1011", "consecutive underscores not allowed in numeric literals", startLine, startColumn) + } + if !isOctalDigit(next) { + l.addError("E1013", "numeric literal cannot end with underscore", startLine, startColumn) + } + } + l.readChar() + } + return l.input[position:l.position], tokenizer.INT } } @@ -716,3 +737,7 @@ func isDigit(ch byte) bool { func isHexDigit(ch byte) bool { return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F') } + +func isOctalDigit(ch byte) bool { + return '0' <= ch && ch <= '7' +} diff --git a/pkg/lexer/lexer_test.go b/pkg/lexer/lexer_test.go index f57fd9fc..5cb60d0b 100644 --- a/pkg/lexer/lexer_test.go +++ b/pkg/lexer/lexer_test.go @@ -1078,6 +1078,118 @@ func TestBinaryTrailingUnderscore(t *testing.T) { } } +// TestOctalLiterals tests octal number parsing with 0o prefix +func TestOctalLiterals(t *testing.T) { + tests := []struct { + input string + literal string + }{ + {"0o123", "0o123"}, + {"0O123", "0O123"}, + {"0o0", "0o0"}, + {"0o7", "0o7"}, + {"0o777", "0o777"}, + {"0o1_2_3", "0o1_2_3"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + tokens := tokenize(tt.input) + if len(tokens) < 1 { + t.Fatal("expected at least 1 token") + } + if tokens[0].Literal != tt.literal { + t.Errorf("literal = %s, want %s", tokens[0].Literal, tt.literal) + } + }) + } +} + +// TestE1010InvalidOctalLiteral tests invalid octal literal detection +func TestE1010InvalidOctalLiteral(t *testing.T) { + errs := lexErrors("0o") + found := false + for _, e := range errs { + if e.Code == "E1010" { + found = true + break + } + } + if !found { + t.Error("expected E1010 for invalid octal literal") + } +} + +// TestE1010InvalidOctalDigit tests invalid digit in octal literal +func TestE1010InvalidOctalDigit(t *testing.T) { + errs := lexErrors("0o8") + found := false + for _, e := range errs { + if e.Code == "E1010" { + found = true + break + } + } + if !found { + t.Error("expected E1010 for invalid octal digit") + } +} + +// TestOctalConsecutiveUnderscores tests consecutive underscores in octal +func TestOctalConsecutiveUnderscores(t *testing.T) { + errs := lexErrors("0o12__34") + found := false + for _, e := range errs { + if e.Code == "E1011" { + found = true + break + } + } + if !found { + t.Error("expected E1011 for consecutive underscores in octal") + } +} + +// TestOctalTrailingUnderscore tests trailing underscore in octal +func TestOctalTrailingUnderscore(t *testing.T) { + errs := lexErrors("0o123_") + found := false + for _, e := range errs { + if e.Code == "E1013" { + found = true + break + } + } + if !found { + t.Error("expected E1013 for trailing underscore in octal") + } +} + +// TestLeadingZerosAsDecimal tests that leading zeros are treated as decimal +func TestLeadingZerosAsDecimal(t *testing.T) { + tests := []struct { + input string + literal string + }{ + {"0123", "0123"}, + {"09", "09"}, + {"007", "007"}, + {"00123", "00123"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + tokens := tokenize(tt.input) + if len(tokens) < 1 { + t.Fatal("expected at least 1 token") + } + if tokens[0].Literal != tt.literal { + t.Errorf("literal = %s, want %s", tokens[0].Literal, tt.literal) + } + }) + } +} + // TestScientificNotationWithSign tests scientific notation with explicit signs func TestScientificNotationWithSign(t *testing.T) { tests := []struct { diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 88c862bc..309a486d 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -2554,9 +2554,27 @@ func (p *Parser) parseIntegerValue() Expression { // Strip underscores for numeric conversion (they're only for readability) cleanedLiteral := stripUnderscores(p.currentToken.Literal) + // Determine the base explicitly (don't auto-detect to avoid implicit octal) + base := 10 + parseStr := cleanedLiteral + if len(cleanedLiteral) >= 2 && cleanedLiteral[0] == '0' { + switch cleanedLiteral[1] { + case 'x', 'X': + base = 16 + parseStr = cleanedLiteral[2:] // strip "0x" + case 'b', 'B': + base = 2 + parseStr = cleanedLiteral[2:] // strip "0b" + case 'o', 'O': + base = 8 + parseStr = cleanedLiteral[2:] // strip "0o" + } + // Leading zeros without prefix are treated as decimal (not octal) + } + // Use big.Int to parse integers of arbitrary size value := new(big.Int) - _, ok := value.SetString(cleanedLiteral, 0) + _, ok := value.SetString(parseStr, base) if !ok { msg := fmt.Sprintf("could not parse %q as integer", p.currentToken.Literal) p.errors = append(p.errors, msg) diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go index 1581c981..ca5e1ca9 100644 --- a/pkg/parser/parser_test.go +++ b/pkg/parser/parser_test.go @@ -198,6 +198,45 @@ func TestIntegerLiterals(t *testing.T) { } } +func TestIntegerLiteralBases(t *testing.T) { + tests := []struct { + input string + expected int64 + }{ + // Hex literals + {"temp x int = 0xFF", 255}, + {"temp x int = 0x10", 16}, + {"temp x int = 0XFF", 255}, + // Binary literals + {"temp x int = 0b1010", 10}, + {"temp x int = 0B1111", 15}, + {"temp x int = 0b0", 0}, + // Octal literals with explicit 0o prefix + {"temp x int = 0o123", 83}, + {"temp x int = 0O777", 511}, + {"temp x int = 0o0", 0}, + // Leading zeros should be decimal (not octal) + {"temp x int = 0123", 123}, + {"temp x int = 09", 9}, + {"temp x int = 007", 7}, + {"temp x int = 00123", 123}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + program := parseProgram(t, tt.input) + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + stmt, ok := program.Statements[0].(*VariableDeclaration) + if !ok { + t.Fatalf("not VariableDeclaration, got %T", program.Statements[0]) + } + testIntegerLiteral(t, stmt.Value, tt.expected) + }) + } +} + func TestFloatLiterals(t *testing.T) { tests := []struct { input string