Skip to content

Commit 5a5f8c4

Browse files
authored
perf: unbundle needle from char/pred pattern (#11376)
This PR aims to improve the performance of `String.contains`, `String.find`, etc. when using patterns of type `Char` or `Char -> Bool` by moving the needle out of the iterator state and thus working around missing unboxing in the compiler.
1 parent e8d35a1 commit 5a5f8c4

File tree

2 files changed

+29
-33
lines changed

2 files changed

+29
-33
lines changed

src/Init/Data/String/Pattern/Char.lean

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,65 +21,63 @@ public section
2121

2222
namespace String.Slice.Pattern
2323

24-
structure ForwardCharSearcher (s : Slice) where
24+
structure ForwardCharSearcher (needle : Char) (s : Slice) where
2525
currPos : s.Pos
26-
needle : Char
2726
deriving Inhabited
2827

2928
namespace ForwardCharSearcher
3029

3130
@[inline]
32-
def iter (c : Char) (s : Slice) : Std.Iter (α := ForwardCharSearcher s) (SearchStep s) :=
33-
{ internalState := { currPos := s.startPos, needle := c }}
31+
def iter (c : Char) (s : Slice) : Std.Iter (α := ForwardCharSearcher c s) (SearchStep s) :=
32+
{ internalState := { currPos := s.startPos }}
3433

35-
instance (s : Slice) : Std.Iterators.Iterator (ForwardCharSearcher s) Id (SearchStep s) where
34+
instance (s : Slice) : Std.Iterators.Iterator (ForwardCharSearcher c s) Id (SearchStep s) where
3635
IsPlausibleStep it
3736
| .yield it' out =>
38-
it.internalState.needle = it'.internalState.needle ∧
3937
∃ h1 : it.internalState.currPos ≠ s.endPos,
4038
it'.internalState.currPos = it.internalState.currPos.next h1 ∧
4139
match out with
4240
| .matched startPos endPos =>
4341
it.internalState.currPos = startPos ∧
4442
it'.internalState.currPos = endPos ∧
45-
it.internalState.currPos.get h1 = it.internalState.needle
43+
it.internalState.currPos.get h1 = c
4644
| .rejected startPos endPos =>
4745
it.internalState.currPos = startPos ∧
4846
it'.internalState.currPos = endPos ∧
49-
it.internalState.currPos.get h1 ≠ it.internalState.needle
47+
it.internalState.currPos.get h1 ≠ c
5048
| .skip _ => False
5149
| .done => it.internalState.currPos = s.endPos
52-
step := fun ⟨currPos, needle⟩ =>
50+
step := funcurrPos⟩ =>
5351
if h1 : currPos = s.endPos then
5452
pure (.deflate ⟨.done, by simp [h1]⟩)
5553
else
5654
let nextPos := currPos.next h1
57-
let nextIt := ⟨nextPos, needle
58-
if h2 : currPos.get h1 = needle then
55+
let nextIt := ⟨nextPos
56+
if h2 : currPos.get h1 = c then
5957
pure (.deflate ⟨.yield nextIt (.matched currPos nextPos), by simp [h1, h2, nextIt, nextPos]⟩)
6058
else
6159
pure (.deflate ⟨.yield nextIt (.rejected currPos nextPos), by simp [h1, h2, nextIt, nextPos]⟩)
6260

63-
def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharSearcher s) Id where
61+
def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharSearcher s c) Id where
6462
rel := InvImage WellFoundedRelation.rel (fun it => it.internalState.currPos)
6563
wf := InvImage.wf _ WellFoundedRelation.wf
6664
subrelation {it it'} h := by
6765
simp_wf
6866
obtain ⟨step, h, h'⟩ := h
6967
cases step
7068
· cases h
71-
obtain ⟨_, h1, h2, _⟩ := h'
69+
obtain ⟨_, h2, _⟩ := h'
7270
simp [h2]
7371
· cases h'
7472
· cases h
7573

76-
instance : Std.Iterators.Finite (ForwardCharSearcher s) Id :=
74+
instance : Std.Iterators.Finite (ForwardCharSearcher s c) Id :=
7775
.of_finitenessRelation finitenessRelation
7876

79-
instance : Std.Iterators.IteratorLoop (ForwardCharSearcher s) Id Id :=
77+
instance : Std.Iterators.IteratorLoop (ForwardCharSearcher s c) Id Id :=
8078
.defaultImplementation
8179

82-
instance {c : Char} : ToForwardSearcher c ForwardCharSearcher where
80+
instance {c : Char} : ToForwardSearcher c (ForwardCharSearcher c) where
8381
toSearcher := iter c
8482

8583
instance {c : Char} : ForwardPattern c := .defaultImplementation

src/Init/Data/String/Pattern/Pred.lean

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,71 +22,69 @@ public section
2222

2323
namespace String.Slice.Pattern
2424

25-
structure ForwardCharPredSearcher (s : Slice) where
25+
structure ForwardCharPredSearcher (p : Char → Bool) (s : Slice) where
2626
currPos : s.Pos
27-
needle : Char → Bool
2827
deriving Inhabited
2928

3029
namespace ForwardCharPredSearcher
3130

3231
@[inline]
33-
def iter (p : Char → Bool) (s : Slice) : Std.Iter (α := ForwardCharPredSearcher s) (SearchStep s) :=
34-
{ internalState := { currPos := s.startPos, needle := p }}
32+
def iter (p : Char → Bool) (s : Slice) : Std.Iter (α := ForwardCharPredSearcher p s) (SearchStep s) :=
33+
{ internalState := { currPos := s.startPos }}
3534

36-
instance (s : Slice) : Std.Iterators.Iterator (ForwardCharPredSearcher s) Id (SearchStep s) where
35+
instance (s : Slice) : Std.Iterators.Iterator (ForwardCharPredSearcher p s) Id (SearchStep s) where
3736
IsPlausibleStep it
3837
| .yield it' out =>
39-
it.internalState.needle = it'.internalState.needle ∧
4038
∃ h1 : it.internalState.currPos ≠ s.endPos,
4139
it'.internalState.currPos = it.internalState.currPos.next h1 ∧
4240
match out with
4341
| .matched startPos endPos =>
4442
it.internalState.currPos = startPos ∧
4543
it'.internalState.currPos = endPos ∧
46-
it.internalState.needle (it.internalState.currPos.get h1)
44+
p (it.internalState.currPos.get h1)
4745
| .rejected startPos endPos =>
4846
it.internalState.currPos = startPos ∧
4947
it'.internalState.currPos = endPos ∧
50-
¬ it.internalState.needle (it.internalState.currPos.get h1)
48+
¬ p (it.internalState.currPos.get h1)
5149
| .skip _ => False
5250
| .done => it.internalState.currPos = s.endPos
53-
step := fun ⟨currPos, needle⟩ =>
51+
step := funcurrPos⟩ =>
5452
if h1 : currPos = s.endPos then
5553
pure (.deflate ⟨.done, by simp [h1]⟩)
5654
else
5755
let nextPos := currPos.next h1
58-
let nextIt := ⟨nextPos, needle
59-
if h2 : needle <| currPos.get h1 then
56+
let nextIt := ⟨nextPos
57+
if h2 : p <| currPos.get h1 then
6058
pure (.deflate ⟨.yield nextIt (.matched currPos nextPos), by simp [h1, h2, nextPos, nextIt]⟩)
6159
else
6260
pure (.deflate ⟨.yield nextIt (.rejected currPos nextPos), by simp [h1, h2, nextPos, nextIt]⟩)
6361

6462

65-
def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharPredSearcher s) Id where
63+
def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharPredSearcher p s) Id where
6664
rel := InvImage WellFoundedRelation.rel (fun it => it.internalState.currPos)
6765
wf := InvImage.wf _ WellFoundedRelation.wf
6866
subrelation {it it'} h := by
6967
simp_wf
7068
obtain ⟨step, h, h'⟩ := h
7169
cases step
7270
· cases h
73-
obtain ⟨_, h1, h2, _⟩ := h'
71+
obtain ⟨_, h2, _⟩ := h'
7472
simp [h2]
7573
· cases h'
7674
· cases h
7775

78-
instance : Std.Iterators.Finite (ForwardCharPredSearcher s) Id :=
76+
instance : Std.Iterators.Finite (ForwardCharPredSearcher p s) Id :=
7977
.of_finitenessRelation finitenessRelation
8078

81-
instance : Std.Iterators.IteratorLoop (ForwardCharPredSearcher s) Id Id :=
79+
instance : Std.Iterators.IteratorLoop (ForwardCharPredSearcher p s) Id Id :=
8280
.defaultImplementation
8381

84-
instance {p : Char → Bool} : ToForwardSearcher p ForwardCharPredSearcher where
82+
instance {p : Char → Bool} : ToForwardSearcher p (ForwardCharPredSearcher p) where
8583
toSearcher := iter p
8684

8785
instance {p : Char → Bool} : ForwardPattern p := .defaultImplementation
8886

89-
instance {p : Char → Prop} [DecidablePred p] : ToForwardSearcher p ForwardCharPredSearcher where
87+
instance {p : Char → Prop} [DecidablePred p] : ToForwardSearcher p (ForwardCharPredSearcher p) where
9088
toSearcher := iter (decide <| p ·)
9189

9290
instance {p : Char → Prop} [DecidablePred p] : ForwardPattern p :=

0 commit comments

Comments
 (0)