Skip to content

Commit 1674295

Browse files
authored
refactor: redefine String.replace (#10986)
This PR defines `String.Slice.replace` and redefines `String.replace` to use the `Slice` version. The new implementation is generic in the pattern, so it supports things like `"education".replace isVowel "☃!" = "☃!d☃!c☃!t☃!☃!n"`. Since it uses the `ForwardSearcher` infrastructure, `String` patterns are searched using KMP, unlike the previous implementation which had quadratic runtime. As a side effect, the behavior when replacing an empty string now matches that of most other programming languages, namely `"abc".replace "" "k" = "kakbkck"`.
1 parent d427423 commit 1674295

File tree

10 files changed

+149
-29
lines changed

10 files changed

+149
-29
lines changed

src/Init/Data/String.lean

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ public import Init.Data.String.Substring
2222
public import Init.Data.String.TakeDrop
2323
public import Init.Data.String.Modify
2424
public import Init.Data.String.Termination
25+
public import Init.Data.String.ToSlice
26+
public import Init.Data.String.Search

src/Init/Data/String/Modify.lean

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -208,32 +208,6 @@ Examples:
208208
@[inline] def map (f : Char → Char) (s : String) : String :=
209209
mapAux f s s.startValidPos
210210

211-
/--
212-
In the string `s`, replaces all occurrences of `pattern` with `replacement`.
213-
214-
Examples:
215-
* `"red green blue".replace "e" "" = "rd grn blu"`
216-
* `"red green blue".replace "ee" "E" = "red grEn blue"`
217-
* `"red green blue".replace "e" "E" = "rEd grEEn bluE"`
218-
-/
219-
def replace (s pattern replacement : String) : String :=
220-
if h : pattern.rawEndPos.1 = 0 then s
221-
else
222-
have hPatt := Nat.zero_lt_of_ne_zero h
223-
let rec loop (acc : String) (accStop pos : String.Pos.Raw) :=
224-
if h : pos.byteIdx + pattern.rawEndPos.byteIdx > s.rawEndPos.byteIdx then
225-
acc ++ accStop.extract s s.rawEndPos
226-
else
227-
have := Nat.lt_of_lt_of_le (Nat.add_lt_add_left hPatt _) (Nat.ge_of_not_lt h)
228-
if Pos.Raw.substrEq s pos pattern 0 pattern.rawEndPos.byteIdx then
229-
have := Nat.sub_lt_sub_left this (Nat.add_lt_add_left hPatt _)
230-
loop (acc ++ accStop.extract s pos ++ replacement) (pos + pattern) (pos + pattern)
231-
else
232-
have := Nat.sub_lt_sub_left this (Pos.Raw.lt_next s pos)
233-
loop acc accStop (pos.next s)
234-
termination_by s.rawEndPos.1 - pos.1
235-
loop "" 0 0
236-
237211
/--
238212
Replaces each character in `s` with the result of applying `Char.toUpper` to it.
239213

src/Init/Data/String/Search.lean

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/-
2+
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
3+
Released under Apache 2.0 license as described in the file LICENSE.
4+
Authors: Markus Himmel
5+
-/
6+
module
7+
8+
prelude
9+
public import Init.Data.String.Slice
10+
11+
set_option doc.verso true
12+
13+
/-!
14+
# String operations based on slice operations
15+
16+
This file contains {name}`String` operations that are implemented by passing to
17+
{name}`String.Slice`.
18+
-/
19+
20+
public section
21+
22+
namespace String
23+
24+
section
25+
open String.Slice Pattern
26+
27+
variable {ρ : Type} {σ : Slice → Type}
28+
variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)]
29+
variable [∀ s, Std.Iterators.Finite (σ s) Id]
30+
variable [∀ s, Std.Iterators.IteratorLoop (σ s) Id Id]
31+
32+
33+
/--
34+
Constructs a new string obtained by replacing all occurrences of {name}`pattern` with
35+
{name}`replacement` in {name}`s`.
36+
37+
This function is generic over all currently supported patterns. The replacement may be a
38+
{name}`String` or a {name}`String.Slice`.
39+
40+
Examples:
41+
* {lean}`"red green blue".replace 'e' "" = "rd grn blu"`
42+
* {lean}`"red green blue".replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl"`
43+
* {lean}`"red green blue".replace "e" "" = "rd grn blu"`
44+
* {lean}`"red green blue".replace "ee" "E" = "red grEn blue"`
45+
* {lean}`"red green blue".replace "e" "E" = "rEd grEEn bluE"`
46+
* {lean}`"aaaaa".replace "aa" "b" = "bba"`
47+
* {lean}`"abc".replace "" "k" = "kakbkck"`
48+
-/
49+
@[inline]
50+
def replace [ToForwardSearcher ρ σ] [ToSlice α] (s : String) (pattern : ρ)
51+
(replacement : α) : String :=
52+
s.toSlice.replace pattern replacement
53+
54+
end
55+
56+
end String

src/Init/Data/String/Slice.lean

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ prelude
99
public import Init.Data.String.Pattern
1010
public import Init.Data.Ord.Basic
1111
public import Init.Data.Iterators.Combinators.FilterMap
12+
public import Init.Data.String.ToSlice
1213

1314
set_option doc.verso true
1415

@@ -36,6 +37,12 @@ public section
3637

3738
namespace String.Slice
3839

40+
instance : HAppend String String.Slice String where
41+
-- This implementation performs an unnecessary copy which could be avoided by providing a custom
42+
-- C++ implementation for this instance. Note: if `String` had no custom runtime representation
43+
-- at all, then this would be very easy to get right from Lean using `ByteArray.copySlice`.
44+
hAppend s t := s ++ t.copy
45+
3946
open Pattern
4047

4148
/--
@@ -349,6 +356,28 @@ Examples:
349356
def dropPrefix [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice :=
350357
dropPrefix? s pat |>.getD s
351358

359+
/--
360+
Constructs a new string obtained by replacing all occurrences of {name}`pattern` with
361+
{name}`replacement` in {name}`s`.
362+
363+
This function is generic over all currently supported patterns. The replacement may be a
364+
{name}`String` or a {name}`String.Slice`.
365+
366+
Examples:
367+
* {lean}`"red green blue".toSlice.replace 'e' "" = "rd grn blu"`
368+
* {lean}`"red green blue".toSlice.replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl"`
369+
* {lean}`"red green blue".toSlice.replace "e" "" = "rd grn blu"`
370+
* {lean}`"red green blue".toSlice.replace "ee" "E" = "red grEn blue"`
371+
* {lean}`"red green blue".toSlice.replace "e" "E" = "rEd grEEn bluE"`
372+
* {lean}`"aaaaa".toSlice.replace "aa" "b" = "bba"`
373+
* {lean}`"abc".toSlice.replace "" "k" = "kakbkck"`
374+
-/
375+
def replace [ToForwardSearcher ρ σ] [ToSlice α] (s : Slice) (pattern : ρ) (replacement : α) :
376+
String :=
377+
(ToForwardSearcher.toSearcher s pattern).fold (init := "") (fun
378+
| sofar, .matched .. => sofar ++ ToSlice.toSlice replacement
379+
| sofar, .rejected start stop => sofar ++ s.replaceStartEnd! start stop)
380+
352381
/--
353382
Removes the specified number of characters (Unicode code points) from the start of the slice.
354383

src/Init/Data/String/ToSlice.lean

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/-
2+
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
3+
Released under Apache 2.0 license as described in the file LICENSE.
4+
Authors: Markus Himmel
5+
-/
6+
module
7+
8+
prelude
9+
public import Init.Data.String.Defs
10+
11+
/-!
12+
# The `ToSlice` typeclass
13+
-/
14+
15+
public section
16+
17+
namespace String
18+
19+
/--
20+
Typeclass for types that have a conversion function to `String.Slice`. This typeclass is used to
21+
make some `String`/`String.Slice` functions polymorphic. As such, for now it is not intended that
22+
there instances of this beyond `ToSlice String` and `ToSlice String.Slice`.
23+
24+
To convert arbitrary data into a string representation, see `ToString` and `Repr`.
25+
-/
26+
class ToSlice (α : Type u) where
27+
toSlice : α → String.Slice
28+
29+
@[inline]
30+
instance : ToSlice String.Slice where
31+
toSlice := id
32+
33+
@[inline]
34+
instance : ToSlice String where
35+
toSlice := toSlice
36+
37+
end String

src/Lean/Compiler/FFI.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ module
77

88
prelude
99
public import Init.System.FilePath
10-
import Init.Data.String.Modify
10+
import Init.Data.String.Search
1111

1212
public section
1313

src/Lean/Data/Lsp/Utf16.lean

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ Authors: Marc Huisinga, Wojciech Nawrocki
77
module
88

99
prelude
10-
public import Init.Data.String
1110
public import Lean.Data.Lsp.BasicAux
1211
public import Lean.DeclarationRange
1312

src/Lean/DocString/Markdown.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import Init.Data.Ord
1212
public import Lean.DocString.Types
1313
public import Init.Data.String.TakeDrop
1414
import Init.Data.String.Iterator
15-
public import Init.Data.String.Modify
15+
public import Init.Data.String.Search
1616

1717
set_option linter.missingDocs true
1818

tests/lean/run/string_replace.lean

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module
2+
3+
def isVowel (c : Char) : Bool :=
4+
c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u'
5+
6+
#guard "education".replace isVowel "☃!" = "☃!d☃!c☃!t☃!☃!n"
7+
#guard "red green blue".replace "e" "" = "rd grn blu"
8+
#guard "red green blue".replace 'e' "" = "rd grn blu"
9+
#guard "red green blue".replace "ee" "E" = "red grEn blue"
10+
#guard "red green blue".replace "e" "E" = "rEd grEEn bluE"
11+
#guard "abc".replace "" "k" = "kakbkck"
12+
#guard "".replace "" "z" = "zℕz"
13+
#guard "𝔸".replace "" "z" = "z𝔸z"
14+
#guard "".replace "" "z" = "zvẑz"
15+
#guard "aaaaa".replace "aa" "b" = "bba"
16+
#guard "v̂fℚo".replace ("ℕfℚoℤ".toSlice.drop 1 |>.dropEnd 1) ("☃🔭🌌".toSlice.dropEnd 1 |>.drop 1) = "v̂🔭"
17+
#guard ("abcde".toSlice.drop 1).replace (· == 'c') "C" = "bCde"
18+
#guard (("ac bc cc cd".toSlice.split " ").map (·.replace 'c' "C") |>.toList) = ["aC", "bC", "CC", "Cd"]
19+
#guard "red green blue".replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl"
20+
#guard "aab".replace "ab" "X" = "aX"
21+
#guard " ℚℚ\n ".replace "\n" "\n" = "\n "

tests/lean/run/string_slice.lean

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ Tests for `String.Slice` functions
2323
#guard ("coffee tea water".toSlice.split ' ').toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice]
2424
#guard ("coffee tea water".toSlice.split " tea ").toList == ["coffee".toSlice, "water".toSlice]
2525
#guard ("baaab".toSlice.split "aa").toList == ["b".toSlice, "ab".toSlice]
26+
#guard ("aababaaba".toSlice.split "ab").toList == ["a".toSlice, "".toSlice, "a".toSlice, "a".toSlice]
2627

2728
#guard ("coffee tea water".toSlice.splitInclusive Char.isWhitespace).toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice]
2829
#guard ("coffee tea water".toSlice.splitInclusive ' ').toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice]
2930
#guard ("coffee tea water".toSlice.splitInclusive " tea ").toList == ["coffee tea ".toSlice, "water".toSlice]
3031
#guard ("a".toSlice.splitInclusive (fun (_ : Char) => true)).toList == ["a".toSlice]
3132
#guard ("baaab".toSlice.splitInclusive "aa").toList == ["baa".toSlice, "ab".toSlice]
33+
#guard ("aababaaba".toSlice.splitInclusive "ab").toList == ["aab".toSlice, "ab".toSlice, "aab".toSlice, "a".toSlice]
3234

3335
#guard "red green blue".toSlice.drop 4 == "green blue".toSlice
3436
#guard "red green blue".toSlice.drop 10 == "blue".toSlice

0 commit comments

Comments
 (0)