Skip to content

Commit 5957364

Browse files
authored
chore: more minor String improvements (#10930)
This PR moves some more material out of `Init.Data.String.Basic` and fixes the incorrect name `String.Pos.Raw.IsValidForSlice.le_utf8ByteSize`.
1 parent ba7798b commit 5957364

File tree

6 files changed

+91
-80
lines changed

6 files changed

+91
-80
lines changed

src/Init/Data/String/Basic.lean

Lines changed: 7 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,7 @@ theorem Slice.Pos.offset_str {s : Slice} {pos : s.Pos} :
893893
@[simp]
894894
theorem Slice.Pos.offset_str_le_offset_endExclusive {s : Slice} {pos : s.Pos} :
895895
pos.str.offset ≤ s.endExclusive.offset := by
896-
have := pos.isValidForSlice.le_utf8ByteSize
896+
have := pos.isValidForSlice.le_rawEndPos
897897
have := s.startInclusive_le_endExclusive
898898
simp only [Pos.Raw.le_iff, byteIdx_rawEndPos, utf8ByteSize_eq, offset_str,
899899
Pos.Raw.byteIdx_offsetBy, ValidPos.le_iff] at *
@@ -1012,7 +1012,7 @@ theorem Slice.utf8ByteSize_replaceStartEnd {s : Slice} {newStart newEnd : s.Pos}
10121012
theorem Pos.Raw.isValidForSlice_replaceStart {s : Slice} {p : s.Pos} {off : Pos.Raw} :
10131013
off.IsValidForSlice (s.replaceStart p) ↔ (off.offsetBy p.offset).IsValidForSlice s := by
10141014
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
1015-
· have := p.isValidForSlice.le_utf8ByteSize
1015+
· have := p.isValidForSlice.le_rawEndPos
10161016
simp_all [le_iff]
10171017
omega
10181018
· simpa [Pos.Raw.offsetBy_assoc] using h₂
@@ -1025,7 +1025,7 @@ theorem Pos.Raw.isValidForSlice_replaceEnd {s : Slice} {p : s.Pos} {off : Pos.Ra
10251025
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_, ?_⟩, fun ⟨h₁, ⟨h₂, h₃⟩⟩ => ⟨?_, ?_⟩⟩
10261026
· simpa using h₁
10271027
· simp only [Slice.rawEndPos_replaceEnd] at h₁
1028-
exact Pos.Raw.le_trans h₁ p.isValidForSlice.le_utf8ByteSize
1028+
exact Pos.Raw.le_trans h₁ p.isValidForSlice.le_rawEndPos
10291029
· simpa using h₂
10301030
· simpa using h₁
10311031
· simpa using h₃
@@ -1297,7 +1297,7 @@ theorem Slice.Pos.ofReplaceStart_startPos {s : Slice} {pos : s.Pos} :
12971297
@[simp]
12981298
theorem Slice.Pos.ofReplaceStart_endPos {s : Slice} {pos : s.Pos} :
12991299
ofReplaceStart (s.replaceStart pos).endPos = s.endPos := by
1300-
have := pos.isValidForSlice.le_utf8ByteSize
1300+
have := pos.isValidForSlice.le_rawEndPos
13011301
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff]
13021302

13031303
theorem Slice.Pos.ofReplaceStart_inj {s : Slice} {p₀ : s.Pos} {pos pos' : (s.replaceStart p₀).Pos} :
@@ -1388,7 +1388,7 @@ theorem Slice.Pos.lt_next {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
13881388
@[inline, expose]
13891389
def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : String.Pos.Raw :=
13901390
go (pos.offset.byteIdx - 1) (by
1391-
have := pos.isValidForSlice.le_utf8ByteSize
1391+
have := pos.isValidForSlice.le_rawEndPos
13921392
simp [Pos.Raw.le_iff, Pos.ext_iff] at ⊢ this h
13931393
omega)
13941394
where
@@ -1603,7 +1603,7 @@ theorem Slice.Pos.prevAux_lt_self {s : Slice} {p : s.Pos} {h} : p.prevAux h < p.
16031603
omega
16041604

16051605
theorem Slice.Pos.prevAux_lt_rawEndPos {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.rawEndPos :=
1606-
Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_utf8ByteSize
1606+
Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_rawEndPos
16071607

16081608
@[simp]
16091609
theorem Slice.Pos.prev_ne_endPos {s : Slice} {p : s.Pos} {h} : p.prev h ≠ s.endPos := by
@@ -2256,73 +2256,6 @@ Examples:
22562256
@[inline] def splitOn (s : String) (sep : String := " ") : List String :=
22572257
if sep == "" then [s] else splitOnAux s sep 0 0 0 []
22582258

2259-
/--
2260-
Adds multiple repetitions of a character to the end of a string.
2261-
2262-
Returns `s`, with `n` repetitions of `c` at the end. Internally, the implementation repeatedly calls
2263-
`String.push`, so the string is modified in-place if there is a unique reference to it.
2264-
2265-
Examples:
2266-
* `"indeed".pushn '!' 2 = "indeed!!"`
2267-
* `"indeed".pushn '!' 0 = "indeed"`
2268-
* `"".pushn ' ' 4 = " "`
2269-
-/
2270-
@[inline] def pushn (s : String) (c : Char) (n : Nat) : String :=
2271-
n.repeat (fun s => s.push c) s
2272-
2273-
@[export lean_string_pushn]
2274-
def Internal.pushnImpl (s : String) (c : Char) (n : Nat) : String :=
2275-
String.pushn s c n
2276-
2277-
/--
2278-
Checks whether a string is empty.
2279-
2280-
Empty strings are equal to `""` and have length and end position `0`.
2281-
2282-
Examples:
2283-
* `"".isEmpty = true`
2284-
* `"empty".isEmpty = false`
2285-
* `" ".isEmpty = false`
2286-
-/
2287-
@[inline] def isEmpty (s : String) : Bool :=
2288-
s.rawEndPos == 0
2289-
2290-
@[export lean_string_isempty]
2291-
def Internal.isEmptyImpl (s : String) : Bool :=
2292-
String.isEmpty s
2293-
2294-
/--
2295-
Appends all the strings in a list of strings, in order.
2296-
2297-
Use `String.intercalate` to place a separator string between the strings in a list.
2298-
2299-
Examples:
2300-
* `String.join ["gr", "ee", "n"] = "green"`
2301-
* `String.join ["b", "", "l", "", "ue"] = "blue"`
2302-
* `String.join [] = ""`
2303-
-/
2304-
@[inline] def join (l : List String) : String :=
2305-
l.foldl (fun r s => r ++ s) ""
2306-
2307-
/--
2308-
Appends the strings in a list of strings, placing the separator `s` between each pair.
2309-
2310-
Examples:
2311-
* `", ".intercalate ["red", "green", "blue"] = "red, green, blue"`
2312-
* `" and ".intercalate ["tea", "coffee"] = "tea and coffee"`
2313-
* `" | ".intercalate ["M", "", "N"] = "M | | N"`
2314-
-/
2315-
def intercalate (s : String) : List String → String
2316-
| [] => ""
2317-
| a :: as => go a s as
2318-
where go (acc : String) (s : String) : List String → String
2319-
| a :: as => go (acc ++ s ++ a) s as
2320-
| [] => acc
2321-
2322-
@[export lean_string_intercalate]
2323-
def Internal.intercalateImpl (s : String) : List String → String :=
2324-
String.intercalate s
2325-
23262259

23272260
def offsetOfPosAux (s : String) (pos : Pos.Raw) (i : Pos.Raw) (offset : Nat) : Nat :=
23282261
if i >= pos then offset
@@ -2592,7 +2525,7 @@ theorem length_singleton {c : Char} : (String.singleton c).length = 1 := by
25922525
simp [← length_data]
25932526

25942527
@[simp] theorem length_pushn (c : Char) (n : Nat) : (pushn s c n).length = s.length + n := by
2595-
unfold pushn; induction n <;> simp [Nat.repeat, Nat.add_assoc, *]
2528+
rw [pushn_eq_repeat_push]; induction n <;> simp [Nat.repeat, Nat.add_assoc, *]
25962529

25972530
@[simp] theorem length_append (s t : String) : (s ++ t).length = s.length + t.length := by
25982531
simp [← length_data]

src/Init/Data/String/Defs.lean

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,76 @@ theorem rawEndPos_eq_zero_iff {b : String} : b.rawEndPos = 0 ↔ b = "" := by
185185
theorem endPos_eq_zero_iff {b : String} : b.rawEndPos = 0 ↔ b = "" :=
186186
rawEndPos_eq_zero_iff
187187

188+
/--
189+
Adds multiple repetitions of a character to the end of a string.
190+
191+
Returns `s`, with `n` repetitions of `c` at the end. Internally, the implementation repeatedly calls
192+
`String.push`, so the string is modified in-place if there is a unique reference to it.
193+
194+
Examples:
195+
* `"indeed".pushn '!' 2 = "indeed!!"`
196+
* `"indeed".pushn '!' 0 = "indeed"`
197+
* `"".pushn ' ' 4 = " "`
198+
-/
199+
@[inline] def pushn (s : String) (c : Char) (n : Nat) : String :=
200+
n.repeat (fun s => s.push c) s
201+
202+
theorem pushn_eq_repeat_push {s : String} {c : Char} {n : Nat} :
203+
s.pushn c n = n.repeat (fun s => s.push c) s := (rfl)
204+
205+
@[export lean_string_pushn]
206+
def Internal.pushnImpl (s : String) (c : Char) (n : Nat) : String :=
207+
String.pushn s c n
208+
209+
/--
210+
Checks whether a string is empty.
211+
212+
Empty strings are equal to `""` and have length and end position `0`.
213+
214+
Examples:
215+
* `"".isEmpty = true`
216+
* `"empty".isEmpty = false`
217+
* `" ".isEmpty = false`
218+
-/
219+
@[inline] def isEmpty (s : String) : Bool :=
220+
s.utf8ByteSize == 0
221+
222+
@[export lean_string_isempty]
223+
def Internal.isEmptyImpl (s : String) : Bool :=
224+
String.isEmpty s
225+
226+
/--
227+
Appends all the strings in a list of strings, in order.
228+
229+
Use `String.intercalate` to place a separator string between the strings in a list.
230+
231+
Examples:
232+
* `String.join ["gr", "ee", "n"] = "green"`
233+
* `String.join ["b", "", "l", "", "ue"] = "blue"`
234+
* `String.join [] = ""`
235+
-/
236+
@[inline] def join (l : List String) : String :=
237+
l.foldl (fun r s => r ++ s) ""
238+
239+
/--
240+
Appends the strings in a list of strings, placing the separator `s` between each pair.
241+
242+
Examples:
243+
* `", ".intercalate ["red", "green", "blue"] = "red, green, blue"`
244+
* `" and ".intercalate ["tea", "coffee"] = "tea and coffee"`
245+
* `" | ".intercalate ["M", "", "N"] = "M | | N"`
246+
-/
247+
def intercalate (s : String) : List String → String
248+
| [] => ""
249+
| a :: as => go a s as
250+
where go (acc : String) (s : String) : List String → String
251+
| a :: as => go (acc ++ s ++ a) s as
252+
| [] => acc
253+
254+
@[export lean_string_intercalate]
255+
def Internal.intercalateImpl (s : String) : List String → String :=
256+
String.intercalate s
257+
188258
/--
189259
Predicate for validity of positions inside a `String`.
190260
@@ -219,6 +289,10 @@ structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where priva
219289
le_rawEndPos : off ≤ s.rawEndPos
220290
isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8
221291

292+
theorem Pos.Raw.IsValid.le_utf8ByteSize {s : String} {off : String.Pos.Raw} (h : off.IsValid s) :
293+
off.byteIdx ≤ s.utf8ByteSize := by
294+
simpa [Pos.Raw.le_iff] using h.le_rawEndPos
295+
222296
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} :
223297
p.IsValid s ↔ p ≤ s.rawEndPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 :=
224298
fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩
@@ -371,9 +445,13 @@ theorem Slice.byteIdx_rawEndPos {s : Slice} : s.rawEndPos.byteIdx = s.utf8ByteSi
371445

372446
/-- Criterion for validity of positions in string slices. -/
373447
structure Pos.Raw.IsValidForSlice (s : Slice) (p : Pos.Raw) : Prop where
374-
le_utf8ByteSize : p ≤ s.rawEndPos
448+
le_rawEndPos : p ≤ s.rawEndPos
375449
isValid_offsetBy : (p.offsetBy s.startInclusive.offset).IsValid s.str
376450

451+
theorem Pos.Raw.IsValidForSlice.le_utf8ByteSize {s : Slice} {p : Pos.Raw}
452+
(h : p.IsValidForSlice s) : p.byteIdx ≤ s.utf8ByteSize := by
453+
simpa [Pos.Raw.le_iff] using h.le_rawEndPos
454+
377455
/--
378456
Accesses the indicated byte in the UTF-8 encoding of a string slice.
379457
@@ -479,7 +557,7 @@ instance {s : Slice} (l r : s.Pos) : Decidable (l < r) :=
479557
@[inline, expose]
480558
def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 :=
481559
s.getUTF8Byte pos.offset (by
482-
have := pos.isValidForSlice.le_utf8ByteSize
560+
have := pos.isValidForSlice.le_rawEndPos
483561
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff]
484562
omega)
485563

src/Init/Data/String/Pattern/Char.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharSearcher s
7171
obtain ⟨_, h1, h2, _⟩ := h'
7272
have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1)
7373
have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize
74-
simp [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff] at h1 h2 h4
74+
simp [Pos.ext_iff, String.Pos.Raw.ext_iff] at h1 h2 h4
7575
omega
7676
· cases h'
7777
· cases h

src/Init/Data/String/Pattern/Pred.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharPredSearch
7373
obtain ⟨_, h1, h2, _⟩ := h'
7474
have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1)
7575
have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize
76-
simp [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff] at h1 h2 h4
76+
simp [Pos.ext_iff, String.Pos.Raw.ext_iff] at h1 h2 h4
7777
omega
7878
· cases h'
7979
· cases h

src/Init/Data/String/Pattern/String.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ private def finitenessRelation :
177177
apply Prod.Lex.right'
178178
· simp
179179
· have haux := np.isValidForSlice.le_utf8ByteSize
180-
simp [Slice.Pos.lt_iff, String.Pos.Raw.le_iff, String.Pos.Raw.lt_iff] at h1' haux ⊢
180+
simp [Slice.Pos.lt_iff, String.Pos.Raw.lt_iff] at h1' haux ⊢
181181
omega
182182
· apply Prod.Lex.left
183183
simp [h']

src/Init/Data/String/Slice.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ private def finitenessRelation [Pure m] :
861861
obtain ⟨h1, h2, _⟩ := h'
862862
have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1)
863863
have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize
864-
simp [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff] at h1 h2 h4
864+
simp [Pos.ext_iff, String.Pos.Raw.ext_iff] at h1 h2 h4
865865
omega
866866
· cases h'
867867
· cases h

0 commit comments

Comments
 (0)