Skip to content

Commit dda6885

Browse files
authored
refactor: String.foldl and String.isNat go through String.Slice (#11289)
This PR redefines `String.foldl`, `String.isNat` to use their `String.Slice` counterparts.
1 parent cce4873 commit dda6885

File tree

13 files changed

+307
-153
lines changed

13 files changed

+307
-153
lines changed

src/Init/Data/String/Basic.lean

Lines changed: 181 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,28 @@ theorem Slice.Pos.offset_le_offset_endExclusive {s : Slice} {pos : s.Pos} :
10001000
pos.offset ≤ s.endExclusive.offset :=
10011001
Pos.Raw.le_trans offset_le_offset_str offset_str_le_offset_endExclusive
10021002

1003+
@[simp]
1004+
theorem Slice.Pos.startInclusive_le_str {s : Slice} {pos : s.Pos} :
1005+
s.startInclusive ≤ pos.str := by
1006+
simp [ValidPos.le_iff, Pos.Raw.le_iff]
1007+
1008+
/--
1009+
Given a valid position on `s.str` which is within the bounds of the slice `s`, obtains the
1010+
corresponding valid position on `s`.
1011+
-/
1012+
@[inline]
1013+
def Slice.Pos.ofStr {s : Slice} (pos : s.str.ValidPos) (h₁ : s.startInclusive ≤ pos)
1014+
(h₂ : pos ≤ s.endExclusive) : s.Pos where
1015+
offset := pos.offset.unoffsetBy s.startInclusive.offset
1016+
isValidForSlice := by
1017+
refine ⟨?_, Pos.Raw.offsetBy_unoffsetBy_of_le h₁ |>.symm ▸ pos.isValid⟩
1018+
simp [ValidPos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq] at *
1019+
omega
1020+
1021+
@[simp]
1022+
theorem Slice.Pos.offset_ofStr {s : Slice} {pos : s.str.ValidPos} {h₁ h₂} :
1023+
(ofStr pos h₁ h₂).offset = pos.offset.unoffsetBy s.startInclusive.offset := (rfl)
1024+
10031025
/-- Given a slice and a valid position within the slice, obtain a new slice on the same underlying
10041026
string by replacing the start of the slice with the given position. -/
10051027
@[inline, expose] -- for the defeq `(s.sliceFrom pos).str = s.str`
@@ -1644,11 +1666,15 @@ def Slice.pos! (s : Slice) (off : String.Pos.Raw) : s.Pos :=
16441666
position is not the past-the-end position, which guarantees that such a position exists. -/
16451667
@[expose, extern "lean_string_utf8_next_fast"]
16461668
def ValidPos.next {s : @& String} (pos : @& s.ValidPos) (h : pos ≠ s.endValidPos) : s.ValidPos :=
1647-
((inline (Slice.Pos.next pos.toSlice (ne_of_apply_ne Slice.Pos.ofSlice (by simpa)))).ofSlice)
1669+
(Slice.Pos.next pos.toSlice (ne_of_apply_ne Slice.Pos.ofSlice (by simpa))).ofSlice
1670+
1671+
@[simp]
1672+
theorem Slice.Pos.str_inj {s : Slice} (p₁ p₂ : s.Pos) : p₁.str = p₂.str ↔ p₁ = p₂ := by
1673+
simp [Slice.Pos.ext_iff, ValidPos.ext_iff, Pos.Raw.ext_iff]
16481674

16491675
@[expose, extern "lean_string_utf8_next_fast"]
16501676
def Pos.next {s : @& String} (pos : @& s.ValidPos) (h : pos ≠ s.endValidPos) : s.ValidPos :=
1651-
((inline (Slice.Pos.next pos.toSlice (ne_of_apply_ne Slice.Pos.ofSlice (by simpa)))).ofSlice)
1677+
(Slice.Pos.next pos.toSlice (ne_of_apply_ne Slice.Pos.ofSlice (by simpa))).ofSlice
16521678

16531679
/-- Advances a valid position on a string to the next valid position, or returns `none` if the
16541680
given position is the past-the-end position. -/
@@ -1695,6 +1721,9 @@ def pos? (s : String) (off : Pos.Raw) : Option s.ValidPos :=
16951721
def pos! (s : String) (off : Pos.Raw) : s.ValidPos :=
16961722
(s.toSlice.pos! off).ofSlice
16971723

1724+
@[simp]
1725+
theorem offset_pos {s : String} {off : Pos.Raw} {h} : (s.pos off h).offset = off := rfl
1726+
16981727
/-- Constructs a valid position on `t` from a valid position on `s` and a proof that `s = t`. -/
16991728
@[inline]
17001729
def Slice.Pos.cast {s t : Slice} (pos : s.Pos) (h : s = t) : t.Pos where
@@ -1807,51 +1836,6 @@ theorem Slice.Pos.prev_lt {s : Slice} {p : s.Pos} {h} : p.prev h < p :=
18071836
theorem ValidPos.prev_lt {s : String} {p : s.ValidPos} {h} : p.prev h < p := by
18081837
simp [← toSlice_lt, toSlice_prev]
18091838

1810-
/--
1811-
Advances the position `p` `n` times.
1812-
1813-
If this would move `p` past the end of `s`, the result is `s.endPos`.
1814-
-/
1815-
def Slice.Pos.nextn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
1816-
match n with
1817-
| 0 => p
1818-
| n + 1 =>
1819-
if h : p ≠ s.endPos then
1820-
nextn (p.next h) n
1821-
else
1822-
p
1823-
1824-
/--
1825-
Iterates `p.prev` `n` times.
1826-
1827-
If this would move `p` past the start of `s`, the result is `s.endPos`.
1828-
-/
1829-
def Slice.Pos.prevn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
1830-
match n with
1831-
| 0 => p
1832-
| n + 1 =>
1833-
if h : p ≠ s.startPos then
1834-
prevn (p.prev h) n
1835-
else
1836-
p
1837-
1838-
/--
1839-
Advances the position `p` `n` times.
1840-
1841-
If this would move `p` past the end of `s`, the result is `s.endValidPos`.
1842-
-/
1843-
@[inline]
1844-
def ValidPos.nextn {s : String} (p : s.ValidPos) (n : Nat) : s.ValidPos :=
1845-
(p.toSlice.nextn n).ofSlice
1846-
1847-
/--
1848-
Iterates `p.prev` `n` times.
1849-
1850-
If this would move `p` past the start of `s`, the result is `s.startValidPos`.
1851-
-/
1852-
@[inline]
1853-
def ValidPos.prevn {s : String} (p : s.ValidPos) (n : Nat) : s.ValidPos :=
1854-
(p.toSlice.prevn n).ofSlice
18551839

18561840
@[expose]
18571841
def Pos.Raw.utf8GetAux : List Char → Pos.Raw → Pos.Raw → Char
@@ -1979,7 +1963,7 @@ theorem ValidPos.byteIdx_lt_utf8ByteSize {s : String} (p : s.ValidPos) (h : p
19791963
omega
19801964

19811965
@[simp]
1982-
theorem ValidPos.lt_next {s : String} (p : s.ValidPos) {h} : p < p.next h := by
1966+
theorem ValidPos.lt_next {s : String} {p : s.ValidPos} {h} : p < p.next h := by
19831967
simp [← ValidPos.toSlice_lt, toSlice_next]
19841968

19851969
theorem ValidPos.ne_startPos_of_lt {s : String} {p q : s.ValidPos} :
@@ -1997,6 +1981,109 @@ theorem ValidPos.str_toSlice {s : String} {p : s.ValidPos} : p.toSlice.str = p :
19971981
ext
19981982
simp
19991983

1984+
theorem Slice.Pos.str_le_endExclusive {s : Slice} (p : s.Pos) :
1985+
p.str ≤ s.endExclusive := by
1986+
have := p.isValidForSlice.le_utf8ByteSize
1987+
have := s.startInclusive_le_endExclusive
1988+
simp [ValidPos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq] at *
1989+
omega
1990+
1991+
theorem ValidPos.lt_of_le_of_ne {s : String} {p q : s.ValidPos} :
1992+
p ≤ q → p ≠ q → p < q := by
1993+
simp [ValidPos.le_iff, ValidPos.lt_iff, ValidPos.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff,
1994+
Pos.Raw.ext_iff]
1995+
omega
1996+
1997+
@[simp]
1998+
theorem Slice.Pos.str_endPos {s : Slice} : s.endPos.str = s.endExclusive := by
1999+
simp [ValidPos.ext_iff]
2000+
2001+
theorem Slice.Pos.str_lt_endExclusive {s : Slice} (p : s.Pos) (h : p ≠ s.endPos) :
2002+
p.str < s.endExclusive :=
2003+
ValidPos.lt_of_le_of_ne p.str_le_endExclusive (by rwa [← str_endPos, ne_eq, str_inj])
2004+
2005+
theorem ValidPos.ne_of_lt {s : String} {p q : s.ValidPos} : p < q → p ≠ q := by
2006+
simpa [ValidPos.lt_iff, ValidPos.ext_iff] using Pos.Raw.ne_of_lt
2007+
2008+
theorem ValidPos.lt_of_lt_of_le {s : String} {p q r : s.ValidPos} : p < q → q ≤ r → p < r := by
2009+
simpa [ValidPos.lt_iff, ValidPos.le_iff] using Pos.Raw.lt_of_lt_of_le
2010+
2011+
theorem ValidPos.le_endValidPos {s : String} (p : s.ValidPos) : p ≤ s.endValidPos := by
2012+
simpa [ValidPos.le_iff] using p.isValid.le_rawEndPos
2013+
2014+
theorem Slice.Pos.str_ne_endValidPos {s : Slice} (p : s.Pos) (h : p ≠ s.endPos) :
2015+
p.str ≠ s.str.endValidPos :=
2016+
ValidPos.ne_of_lt (ValidPos.lt_of_lt_of_le (p.str_lt_endExclusive h) (ValidPos.le_endValidPos _))
2017+
2018+
theorem ValidPos.le_trans {s : String} {p q r : s.ValidPos} : p ≤ q → q ≤ r → p ≤ r := by
2019+
simpa [ValidPos.le_iff] using Pos.Raw.le_trans
2020+
2021+
theorem ValidPos.le_of_lt {s : String} {p q : s.ValidPos} : p < q → p ≤ q := by
2022+
simpa [ValidPos.le_iff, ValidPos.lt_iff] using Pos.Raw.le_of_lt
2023+
2024+
theorem Slice.Pos.le_of_not_lt {s : Slice} {p q : s.Pos} : ¬q < p → p ≤ q := by
2025+
simp [Slice.Pos.le_iff, Slice.Pos.lt_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff]
2026+
2027+
theorem Slice.Pos.ne_endPos_of_lt {s : Slice} {p q : s.Pos} : p < q → p ≠ s.endPos := by
2028+
have := q.isValidForSlice.le_utf8ByteSize
2029+
simp [lt_iff, Pos.ext_iff, Pos.Raw.lt_iff, Pos.Raw.ext_iff]
2030+
omega
2031+
2032+
theorem Slice.Pos.next_le_of_lt {s : Slice} {p q : s.Pos} {h} : p < q → p.next h ≤ q := by
2033+
-- Things like this will become a lot simpler once we have the `Splits` machinery developed,
2034+
-- but this is `String.Basic`, so we have to suffer a little.
2035+
refine fun hpq => le_of_not_lt (fun hq => ?_)
2036+
have := q.isUTF8FirstByte_byte (h := ne_endPos_of_lt hq)
2037+
rw [byte, getUTF8Byte, String.getUTF8Byte] at this
2038+
simp only [Pos.Raw.byteIdx_offsetBy] at this
2039+
have h₁ : q.offset.byteIdx = p.offset.byteIdx + (q.offset.byteIdx - p.offset.byteIdx) := by
2040+
simp [lt_iff, Pos.Raw.lt_iff] at hpq
2041+
omega
2042+
have h₂ : q.offset.byteIdx - p.offset.byteIdx < (p.get h).utf8Size := by
2043+
simp [lt_iff, Pos.Raw.lt_iff] at hq
2044+
omega
2045+
conv at this => congr; arg 2; rw [h₁, ← Nat.add_assoc]
2046+
rw [← ByteArray.getElem_extract (start := s.startInclusive.offset.byteIdx + p.offset.byteIdx)
2047+
(stop := s.startInclusive.offset.byteIdx + p.offset.byteIdx + (p.get h).utf8Size)] at this
2048+
· simp only [← utf8Encode_get_eq_extract, List.utf8Encode_singleton] at this
2049+
have h₃ := List.getElem_toByteArray (l := utf8EncodeChar (p.get h))
2050+
(i := q.offset.byteIdx - p.offset.byteIdx) (h := by simpa)
2051+
rw [h₃, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar] at this
2052+
simp only [lt_iff, Pos.Raw.lt_iff] at hpq
2053+
omega
2054+
· simp only [ByteArray.size_extract, size_bytes]
2055+
rw [Nat.min_eq_left]
2056+
· omega
2057+
· have := (p.next h).str.isValid.le_utf8ByteSize
2058+
simpa [Nat.add_assoc] using this
2059+
2060+
theorem Slice.Pos.ofSlice_le_iff {s : String} {p : s.toSlice.Pos} {q : s.ValidPos} :
2061+
p.ofSlice ≤ q ↔ p ≤ q.toSlice := Iff.rfl
2062+
2063+
@[simp]
2064+
theorem ValidPos.toSlice_lt_toSlice_iff {s : String} {p q : s.ValidPos} :
2065+
p.toSlice < q.toSlice ↔ p < q := Iff.rfl
2066+
2067+
theorem ValidPos.next_le_of_lt {s : String} {p q : s.ValidPos} {h} : p < q → p.next h ≤ q := by
2068+
rw [next, Slice.Pos.ofSlice_le_iff, ← ValidPos.toSlice_lt_toSlice_iff]
2069+
exact Slice.Pos.next_le_of_lt
2070+
2071+
theorem Slice.Pos.get_eq_get_str {s : Slice} {p : s.Pos} {h} :
2072+
p.get h = p.str.get (str_ne_endValidPos _ h) := by
2073+
simp [ValidPos.get, Slice.Pos.get]
2074+
2075+
@[inline]
2076+
def Slice.Pos.nextFast {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos :=
2077+
ofStr (pos.str.next (str_ne_endValidPos _ h))
2078+
(ValidPos.le_trans Slice.Pos.startInclusive_le_str (ValidPos.le_of_lt ValidPos.lt_next))
2079+
(ValidPos.next_le_of_lt (Slice.Pos.str_lt_endExclusive _ h))
2080+
2081+
@[csimp]
2082+
theorem Slice.Pos.next_eq_nextFast : @Slice.Pos.next = @Slice.Pos.nextFast := by
2083+
funext s pos h
2084+
simp [Slice.Pos.ext_iff, Slice.Pos.nextFast, Pos.Raw.ext_iff, Slice.Pos.get_eq_get_str]
2085+
omega
2086+
20002087
/-- The slice from the beginning of `s` up to `p` (exclusive). -/
20012088
@[inline, expose]
20022089
def sliceTo (s : String) (p : s.ValidPos) : Slice :=
@@ -2209,6 +2296,52 @@ def ValidPos.toReplaceEnd {s : String} (p₀ : s.ValidPos) (pos : s.ValidPos) (h
22092296
theorem ValidPos.offset_sliceTo {s : String} {p₀ : s.ValidPos} {pos : s.ValidPos} {h : pos ≤ p₀} :
22102297
(sliceTo p₀ pos h).offset = pos.offset := (rfl)
22112298

2299+
/--
2300+
Advances the position `p` `n` times.
2301+
2302+
If this would move `p` past the end of `s`, the result is `s.endPos`.
2303+
-/
2304+
def Slice.Pos.nextn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
2305+
match n with
2306+
| 0 => p
2307+
| n + 1 =>
2308+
if h : p ≠ s.endPos then
2309+
nextn (p.next h) n
2310+
else
2311+
p
2312+
2313+
/--
2314+
Iterates `p.prev` `n` times.
2315+
2316+
If this would move `p` past the start of `s`, the result is `s.endPos`.
2317+
-/
2318+
def Slice.Pos.prevn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
2319+
match n with
2320+
| 0 => p
2321+
| n + 1 =>
2322+
if h : p ≠ s.startPos then
2323+
prevn (p.prev h) n
2324+
else
2325+
p
2326+
2327+
/--
2328+
Advances the position `p` `n` times.
2329+
2330+
If this would move `p` past the end of `s`, the result is `s.endValidPos`.
2331+
-/
2332+
@[inline]
2333+
def ValidPos.nextn {s : String} (p : s.ValidPos) (n : Nat) : s.ValidPos :=
2334+
(p.toSlice.nextn n).ofSlice
2335+
2336+
/--
2337+
Iterates `p.prev` `n` times.
2338+
2339+
If this would move `p` past the start of `s`, the result is `s.startValidPos`.
2340+
-/
2341+
@[inline]
2342+
def ValidPos.prevn {s : String} (p : s.ValidPos) (n : Nat) : s.ValidPos :=
2343+
(p.toSlice.prevn n).ofSlice
2344+
22122345
theorem Slice.Pos.le_nextn {s : Slice} {p : s.Pos} {n : Nat} : p ≤ p.nextn n := by
22132346
fun_induction nextn with
22142347
| case1 => simp [Slice.Pos.le_iff]
@@ -2624,29 +2757,6 @@ def offsetOfPos (s : String) (pos : Pos.Raw) : Nat :=
26242757
def Internal.offsetOfPosImpl (s : String) (pos : Pos.Raw) : Nat :=
26252758
String.Pos.Raw.offsetOfPos s pos
26262759

2627-
@[specialize] def foldlAux {α : Type u} (f : α → Char → α) (s : String) (stopPos : Pos.Raw) (i : Pos.Raw) (a : α) : α :=
2628-
if h : i < stopPos then
2629-
have := Nat.sub_lt_sub_left h (Pos.Raw.lt_next s i)
2630-
foldlAux f s stopPos (i.next s) (f a (i.get s))
2631-
else a
2632-
termination_by stopPos.1 - i.1
2633-
2634-
/--
2635-
Folds a function over a string from the left, accumulating a value starting with `init`. The
2636-
accumulated value is combined with each character in order, using `f`.
2637-
2638-
Examples:
2639-
* `"coffee tea water".foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 2`
2640-
* `"coffee tea and water".foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 3`
2641-
* `"coffee tea water".foldl (·.push ·) "" = "coffee tea water"`
2642-
-/
2643-
@[inline] def foldl {α : Type u} (f : α → Char → α) (init : α) (s : String) : α :=
2644-
foldlAux f s s.rawEndPos 0 init
2645-
2646-
@[export lean_string_foldl]
2647-
def Internal.foldlImpl (f : String → Char → String) (init : String) (s : String) : String :=
2648-
String.foldl f init s
2649-
26502760
@[specialize] def foldrAux {α : Type u} (f : Char → α → α) (a : α) (s : String) (i begPos : Pos.Raw) : α :=
26512761
if h : begPos < i then
26522762
have := Pos.Raw.prev_lt_of_pos s i <| mt (congrArg String.Pos.Raw.byteIdx) <|
@@ -2730,55 +2840,6 @@ theorem Pos.Raw.utf8SetAux_of_gt (c' : Char) : ∀ (cs : List Char) {i p : Pos.R
27302840
rw [utf8SetAux, if_neg (mt (congrArg (·.1)) (Ne.symm <| Nat.ne_of_lt h)), utf8SetAux_of_gt c' cs]
27312841
exact Nat.lt_of_lt_of_le h (Nat.le_add_right ..)
27322842

2733-
2734-
/--
2735-
Checks whether the string can be interpreted as the decimal representation of a natural number.
2736-
2737-
A string can be interpreted as a decimal natural number if it is not empty and all the characters in
2738-
it are digits.
2739-
2740-
Use `String.toNat?` or `String.toNat!` to convert such a string to a natural number.
2741-
2742-
Examples:
2743-
* `"".isNat = false`
2744-
* `"0".isNat = true`
2745-
* `"5".isNat = true`
2746-
* `"05".isNat = true`
2747-
* `"587".isNat = true`
2748-
* `"-587".isNat = false`
2749-
* `" 5".isNat = false`
2750-
* `"2+3".isNat = false`
2751-
* `"0xff".isNat = false`
2752-
-/
2753-
@[inline] def isNat (s : String) : Bool :=
2754-
!s.isEmpty && s.all (·.isDigit)
2755-
2756-
/--
2757-
Interprets a string as the decimal representation of a natural number, returning it. Returns `none`
2758-
if the string does not contain a decimal natural number.
2759-
2760-
A string can be interpreted as a decimal natural number if it is not empty and all the characters in
2761-
it are digits.
2762-
2763-
Use `String.isNat` to check whether `String.toNat?` would return `some`. `String.toNat!` is an
2764-
alternative that panics instead of returning `none` when the string is not a natural number.
2765-
2766-
Examples:
2767-
* `"".toNat? = none`
2768-
* `"0".toNat? = some 0`
2769-
* `"5".toNat? = some 5`
2770-
* `"587".toNat? = some 587`
2771-
* `"-587".toNat? = none`
2772-
* `" 5".toNat? = none`
2773-
* `"2+3".toNat? = none`
2774-
* `"0xff".toNat? = none`
2775-
-/
2776-
def toNat? (s : String) : Option Nat :=
2777-
if s.isNat then
2778-
some <| s.foldl (fun n c => n*10 + (c.toNat - '0'.toNat)) 0
2779-
else
2780-
none
2781-
27822843
/--
27832844
Checks whether substrings of two strings are equal. Substrings are indicated by their starting
27842845
positions and a size in _UTF-8 bytes_. Returns `false` if the indicated substring does not exist in
@@ -2903,5 +2964,3 @@ namespace Char
29032964
simp [toString_eq_singleton]
29042965

29052966
end Char
2906-
2907-
open String

0 commit comments

Comments
 (0)