Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions src/Init/Data/String/Slice.lean
Original file line number Diff line number Diff line change
Expand Up @@ -1249,7 +1249,8 @@ def foldr {α : Type u} (f : Char → α → α) (init : α) (s : Slice) : α :=
Checks whether the slice can be interpreted as the decimal representation of a natural number.

A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
it are digits.
it are digits. Underscores ({lit}`_`) are allowed as digit separators for readability, but cannot appear
at the start, at the end, or consecutively.

Use {name (scope := "Init.Data.String.Slice")}`toNat?` or
{name (scope := "Init.Data.String.Slice")}`toNat!` to convert such a slice to a natural number.
Expand All @@ -1260,21 +1261,39 @@ Examples:
* {lean}`"5".toSlice.isNat = true`
* {lean}`"05".toSlice.isNat = true`
* {lean}`"587".toSlice.isNat = true`
* {lean}`"1_000".toSlice.isNat = true`
* {lean}`"100_000_000".toSlice.isNat = true`
* {lean}`"-587".toSlice.isNat = false`
* {lean}`" 5".toSlice.isNat = false`
* {lean}`"2+3".toSlice.isNat = false`
* {lean}`"0xff".toSlice.isNat = false`
* {lean}`"_123".toSlice.isNat = false`
* {lean}`"123_".toSlice.isNat = false`
* {lean}`"12__34".toSlice.isNat = false`
-/
@[inline]
def isNat (s : Slice) : Bool :=
!s.isEmpty && s.all Char.isDigit
if s.isEmpty then
false
else
-- Track: isFirst, lastWasUnderscore, lastCharWasDigit, valid
let result := s.foldl (fun (isFirst, lastWasUnderscore, _lastCharWasDigit, valid) c =>
let isDigit := c.isDigit
let isUnderscore := c = '_'
let newValid := valid && (isDigit || isUnderscore) &&
!(isFirst && isUnderscore) && -- Cannot start with underscore
!(lastWasUnderscore && isUnderscore) -- No consecutive underscores
(false, isUnderscore, isDigit, newValid))
(true, false, false, true)
-- Must be valid and last character must have been a digit (not underscore)
result.2.2.2 && result.2.2.1

/--
Interprets a slice as the decimal representation of a natural number, returning it. Returns
{name}`none` if the slice does not contain a decimal natural number.

A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
it are digits.
it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing.

Use {name}`isNat` to check whether {name}`toNat?` would return {name}`some`.
{name (scope := "Init.Data.String.Slice")}`toNat!` is an alternative that panics instead of
Expand All @@ -1285,14 +1304,16 @@ Examples:
* {lean}`"0".toSlice.toNat? = some 0`
* {lean}`"5".toSlice.toNat? = some 5`
* {lean}`"587".toSlice.toNat? = some 587`
* {lean}`"1_000".toSlice.toNat? = some 1000`
* {lean}`"100_000_000".toSlice.toNat? = some 100000000`
* {lean}`"-587".toSlice.toNat? = none`
* {lean}`" 5".toSlice.toNat? = none`
* {lean}`"2+3".toSlice.toNat? = none`
* {lean}`"0xff".toSlice.toNat? = none`
-/
def toNat? (s : Slice) : Option Nat :=
if s.isNat then
some <| s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0
some <| s.foldl (fun n c => if c = '_' then n else n * 10 + (c.toNat - '0'.toNat)) 0
else
none

Expand All @@ -1301,7 +1322,7 @@ Interprets a slice as the decimal representation of a natural number, returning
slice does not contain a decimal natural number.

A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
it are digits.
it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing.

Use {name}`isNat` to check whether {name}`toNat!` would return a value. {name}`toNat?` is a safer
alternative that returns {name}`none` instead of panicking when the string is not a natural number.
Expand All @@ -1310,10 +1331,11 @@ Examples:
* {lean}`"0".toSlice.toNat! = 0`
* {lean}`"5".toSlice.toNat! = 5`
* {lean}`"587".toSlice.toNat! = 587`
* {lean}`"1_000".toSlice.toNat! = 1000`
-/
def toNat! (s : Slice) : Nat :=
if s.isNat then
s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0
s.foldl (fun n c => if c = '_' then n else n * 10 + (c.toNat - '0'.toNat)) 0
else
panic! "Nat expected"

Expand Down
22 changes: 18 additions & 4 deletions src/Init/Data/String/Substring.lean
Original file line number Diff line number Diff line change
Expand Up @@ -403,25 +403,39 @@ Examples:
Checks whether the substring can be interpreted as the decimal representation of a natural number.

A substring can be interpreted as a decimal natural number if it is not empty and all the characters
in it are digits.
in it are digits. Underscores ({lit}`_`) are allowed as digit separators for readability, but cannot appear
at the start, at the end, or consecutively.

Use `Substring.toNat?` to convert such a substring to a natural number.
-/
@[inline] def isNat (s : Substring.Raw) : Bool :=
!s.isEmpty && s.all fun c => c.isDigit
if s.isEmpty then
false
else
-- Track: isFirst, lastWasUnderscore, lastCharWasDigit, valid
let result := s.foldl (fun (isFirst, lastWasUnderscore, _lastCharWasDigit, valid) c =>
let isDigit := c.isDigit
let isUnderscore := c = '_'
let newValid := valid && (isDigit || isUnderscore) &&
!(isFirst && isUnderscore) && -- Cannot start with underscore
!(lastWasUnderscore && isUnderscore) -- No consecutive underscores
(false, isUnderscore, isDigit, newValid))
(true, false, false, true)
-- Must be valid and last character must have been a digit (not underscore)
result.2.2.2 && result.2.2.1

/--
Checks whether the substring can be interpreted as the decimal representation of a natural number,
returning the number if it can.

A substring can be interpreted as a decimal natural number if it is not empty and all the characters
in it are digits.
in it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing.

Use `Substring.isNat` to check whether the substring is such a substring.
-/
def toNat? (s : Substring.Raw) : Option Nat :=
if s.isNat then
some <| s.foldl (fun n c => n*10 + (c.toNat - '0'.toNat)) 0
some <| s.foldl (fun n c => if c = '_' then n else n*10 + (c.toNat - '0'.toNat)) 0
else
none

Expand Down
88 changes: 88 additions & 0 deletions tests/lean/run/string_toNat_underscores.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/-!
Tests for `String.toNat?` and `String.toInt?` with underscore support.

This ensures that numeric parsing functions accept the same underscore digit separators
that are allowed in Lean numeric literals, for consistency.

See: https://github.com/leanprover/lean4/issues/11538
-/

-- Basic underscore tests for String.Slice.isNat
#guard "1_000".toSlice.isNat = true
#guard "100_000".toSlice.isNat = true
#guard "1_000_000".toSlice.isNat = true
#guard "100_000_000".toSlice.isNat = true

-- Edge cases for isNat
#guard "_123".toSlice.isNat = false -- Cannot start with underscore
#guard "123_".toSlice.isNat = false -- Cannot end with underscore
#guard "12__34".toSlice.isNat = false -- Cannot have consecutive underscores
#guard "1_2_3".toSlice.isNat = true -- Single underscores are fine

-- toNat? basic tests
#guard "1_000".toSlice.toNat? = some 1000
#guard "100_000".toSlice.toNat? = some 100000
#guard "1_000_000".toSlice.toNat? = some 1000000
#guard "100_000_000".toSlice.toNat? = some 100000000

-- More complex patterns
#guard "1_2_3_4_5".toSlice.toNat? = some 12345
#guard "9_99".toSlice.toNat? = some 999
#guard "5_0_0_0".toSlice.toNat? = some 5000

-- Edge cases that should fail
#guard "_123".toSlice.toNat? = none
#guard "123_".toSlice.toNat? = none
#guard "12__34".toSlice.toNat? = none
#guard "12_3_".toSlice.toNat? = none
#guard "_".toSlice.toNat? = none

-- Verify numeric values are correct
#guard "1_234".toSlice.toNat? = "1234".toSlice.toNat?
#guard "987_654_321".toSlice.toNat? = "987654321".toSlice.toNat?

-- Tests for String.toNat? (not just Slice)
#guard "1_000".toNat? = some 1000
#guard "100_000".toNat? = some 100000
#guard "1_000_000".toNat? = some 1000000
#guard "_123".toNat? = none
#guard "123_".toNat? = none

-- Tests for String.Slice.isInt
#guard "-1_000".toSlice.isInt = true
#guard "-100_000".toSlice.isInt = true
#guard "1_000".toSlice.isInt = true -- Positive numbers are also ints

-- Edge cases for isInt
#guard "-_123".toSlice.isInt = false -- Cannot have underscore right after minus
#guard "-123_".toSlice.isInt = false -- Cannot end with underscore
#guard "-12__34".toSlice.isInt = false -- Cannot have consecutive underscores

-- toInt? basic tests
#guard "-1_000".toSlice.toInt? = some (-1000)
#guard "-100_000".toSlice.toInt? = some (-100000)
#guard "-1_000_000".toSlice.toInt? = some (-1000000)
#guard "1_000".toSlice.toInt? = some 1000
#guard "100_000".toSlice.toInt? = some 100000

-- Edge cases for toInt?
#guard "-_123".toSlice.toInt? = none
#guard "-123_".toSlice.toInt? = none
#guard "-12__34".toSlice.toInt? = none

-- Verify numeric values are correct
#guard "-1_234".toSlice.toInt? = "-1234".toSlice.toInt?
#guard "987_654_321".toSlice.toInt? = "987654321".toSlice.toInt?

-- Tests for String.toInt? (not just Slice)
#guard "-1_000".toInt? = some (-1000)
#guard "1_000".toInt? = some 1000
#guard "-_123".toInt? = none
#guard "-123_".toInt? = none

-- Compatibility with existing behavior (no underscores)
#guard "0".toNat? = some 0
#guard "123".toNat? = some 123
#guard "".toNat? = none
#guard "-456".toInt? = some (-456)
#guard "789".toInt? = some 789
Loading