Skip to content

Commit 52b687c

Browse files
authored
perf: less allocations when using string patterns (#11255)
This PR reduces the allocations when using string patterns. In particular `startsWith`, `dropPrefix?`, `endsWith`, `dropSuffix?` are optimized.
1 parent 75570f3 commit 52b687c

File tree

8 files changed

+41
-29
lines changed

8 files changed

+41
-29
lines changed

src/Init/Data/String/Defs.lean

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ theorem endExclusive_toSlice {s : String} : s.toSlice.endExclusive = s.endValidP
435435
theorem str_toSlice {s : String} : s.toSlice.str = s := rfl
436436

437437
/-- The number of bytes of the UTF-8 encoding of the string slice. -/
438-
@[expose]
438+
@[expose, inline]
439439
def Slice.utf8ByteSize (s : Slice) : Nat :=
440440
s.startInclusive.offset.byteDistance s.endExclusive.offset
441441

@@ -464,7 +464,7 @@ theorem Pos.Raw.byteIdx_sub_slice {p : Pos.Raw} {s : Slice} :
464464
(p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
465465

466466
/-- The end position of a slice, as a `Pos.Raw`. -/
467-
@[expose]
467+
@[expose, inline]
468468
def Slice.rawEndPos (s : Slice) : Pos.Raw where
469469
byteIdx := s.utf8ByteSize
470470

src/Init/Data/String/Pattern/Basic.lean

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ class ForwardPattern (ρ : Type) where
7474

7575
namespace Internal
7676

77-
@[extern "lean_slice_memcmp"]
78-
def memcmp (lhs rhs : @& Slice) (lstart : @& String.Pos.Raw) (rstart : @& String.Pos.Raw)
77+
@[extern "lean_string_memcmp"]
78+
def memcmpStr (lhs rhs : @& String) (lstart : @& String.Pos.Raw) (rstart : @& String.Pos.Raw)
7979
(len : @& String.Pos.Raw) (h1 : len.offsetBy lstart ≤ lhs.rawEndPos)
8080
(h2 : len.offsetBy rstart ≤ rhs.rawEndPos) : Bool :=
8181
go 0
@@ -99,6 +99,27 @@ where
9999
simp [Pos.Raw.lt_iff] at h ⊢
100100
omega
101101

102+
@[inline]
103+
def memcmpSlice (lhs rhs : Slice) (lstart : String.Pos.Raw) (rstart : String.Pos.Raw)
104+
(len : String.Pos.Raw) (h1 : len.offsetBy lstart ≤ lhs.rawEndPos)
105+
(h2 : len.offsetBy rstart ≤ rhs.rawEndPos) : Bool :=
106+
memcmpStr
107+
lhs.str
108+
rhs.str
109+
(lstart.offsetBy lhs.startInclusive.offset)
110+
(rstart.offsetBy rhs.startInclusive.offset)
111+
len
112+
(by
113+
have := lhs.startInclusive_le_endExclusive
114+
have := lhs.endExclusive.isValid.le_utf8ByteSize
115+
simp [ValidPos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq] at *
116+
omega)
117+
(by
118+
have := rhs.startInclusive_le_endExclusive
119+
have := rhs.endExclusive.isValid.le_utf8ByteSize
120+
simp [ValidPos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq] at *
121+
omega)
122+
102123
end Internal
103124

104125
namespace ForwardPattern

src/Init/Data/String/Pattern/String.lean

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def startsWith (s : Slice) (pat : Slice) : Bool :=
270270
omega
271271
have hp := by
272272
simp [Pos.Raw.le_iff]
273-
Internal.memcmp s pat s.startPos.offset pat.startPos.offset pat.rawEndPos hs hp
273+
Internal.memcmpSlice s pat s.startPos.offset pat.startPos.offset pat.rawEndPos hs hp
274274
else
275275
false
276276

@@ -306,7 +306,7 @@ def endsWith (s : Slice) (pat : Slice) : Bool :=
306306
omega
307307
have hp := by
308308
simp [patStart, Pos.Raw.le_iff] at h ⊢
309-
Internal.memcmp s pat sStart patStart pat.rawEndPos hs hp
309+
Internal.memcmpSlice s pat sStart patStart pat.rawEndPos hs hp
310310
else
311311
false
312312

src/Init/Data/String/Slice.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def beq (s1 s2 : Slice) : Bool :=
6868
if h : s1.utf8ByteSize = s2.utf8ByteSize then
6969
have h1 := by simp [h, String.Pos.Raw.le_iff]
7070
have h2 := by simp [h, String.Pos.Raw.le_iff]
71-
Internal.memcmp s1 s2 s1.startPos.offset s2.startPos.offset s1.rawEndPos h1 h2
71+
Internal.memcmpSlice s1 s2 s1.startPos.offset s2.startPos.offset s1.rawEndPos h1 h2
7272
else
7373
false
7474

src/include/lean/lean.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,7 @@ static inline uint8_t lean_string_dec_eq(b_lean_obj_arg s1, b_lean_obj_arg s2) {
11631163
static inline uint8_t lean_string_dec_lt(b_lean_obj_arg s1, b_lean_obj_arg s2) { return lean_string_lt(s1, s2); }
11641164
LEAN_EXPORT uint64_t lean_string_hash(b_lean_obj_arg);
11651165
LEAN_EXPORT lean_obj_res lean_string_of_usize(size_t);
1166-
LEAN_EXPORT uint8_t lean_slice_memcmp(b_lean_obj_arg s1, b_lean_obj_arg s2, b_lean_obj_arg lstart, b_lean_obj_arg rstart, b_lean_obj_arg len);
1166+
LEAN_EXPORT uint8_t lean_string_memcmp(b_lean_obj_arg s1, b_lean_obj_arg s2, b_lean_obj_arg lstart, b_lean_obj_arg rstart, b_lean_obj_arg len);
11671167
LEAN_EXPORT uint64_t lean_slice_hash(b_lean_obj_arg);
11681168
LEAN_EXPORT uint8_t lean_slice_dec_lt(b_lean_obj_arg s1, b_lean_obj_arg s2);
11691169

src/lake/Lake/Util/FilePath.lean

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,4 @@ where
8282
removeExts s i' e
8383
termination_by i.1
8484

85-
-- sanity check
86-
example :
87-
modOfFilePath "Foo/Bar" = `Foo.Bar
88-
∧ modOfFilePath "Foo/Bar/" = `Foo.Bar
89-
∧ modOfFilePath "Foo/Bar.lean" = `Foo.Bar
90-
∧ modOfFilePath "Foo/Bar.tar.gz" = `Foo.Bar
91-
∧ modOfFilePath "Foo/Bar.lean/" = `Foo.«Bar.lean»
92-
:= by native_decide
93-
9485
attribute [deprecated "Deprecated without replacement." (since := "2025-08-01")] modOfFilePath

src/runtime/object.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2362,6 +2362,17 @@ extern "C" LEAN_EXPORT obj_res lean_string_of_usize(size_t n) {
23622362
return mk_ascii_string_unchecked(std::to_string(n));
23632363
}
23642364

2365+
extern "C" LEAN_EXPORT uint8_t lean_string_memcmp(b_obj_arg s1, b_obj_arg s2, b_obj_arg lstart, b_obj_arg rstart, b_obj_arg len) {
2366+
// Thanks to the proof arguments we know that lstart, rstart and len are all scalars.
2367+
lean_assert(lean_is_scalar(lstart));
2368+
lean_assert(lean_is_scalar(rstart));
2369+
lean_assert(lean_is_scalar(len));
2370+
2371+
char const * lbase = lean_string_cstr(s1) + lean_unbox(lstart);
2372+
char const * rbase = lean_string_cstr(s2) + lean_unbox(rstart);
2373+
return std::memcmp(lbase, rbase, lean_unbox(len)) == 0;
2374+
}
2375+
23652376
size_t lean_slice_size(b_obj_arg slice) {
23662377
b_obj_res start = lean_ctor_get(slice, 1);
23672378
lean_assert(lean_is_scalar(start));
@@ -2377,17 +2388,6 @@ char const * lean_slice_base(b_obj_arg slice) {
23772388
return lean_string_cstr(string) + lean_unbox(offset);
23782389
}
23792390

2380-
extern "C" LEAN_EXPORT uint8_t lean_slice_memcmp(b_obj_arg s1, b_obj_arg s2, b_obj_arg lstart, b_obj_arg rstart, b_obj_arg len) {
2381-
// Thanks to the proof arguments we know that lstart, rstart and len are all scalars.
2382-
lean_assert(lean_is_scalar(lstart));
2383-
lean_assert(lean_is_scalar(rstart));
2384-
lean_assert(lean_is_scalar(len));
2385-
2386-
char const * lbase = lean_slice_base(s1) + lean_unbox(lstart);
2387-
char const * rbase = lean_slice_base(s2) + lean_unbox(rstart);
2388-
return std::memcmp(lbase, rbase, lean_unbox(len)) == 0;
2389-
}
2390-
23912391
extern "C" LEAN_EXPORT uint64_t lean_slice_hash(b_obj_arg s) {
23922392
size_t sz = lean_slice_size(s);
23932393
char const * str = lean_slice_base(s);

stage0/src/stdlib_flags.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ options get_default_options() {
1111
opts = opts.update({"debug", "terminalTacticsAsSorry"}, false);
1212
// switch to `true` for ABI-breaking changes affecting meta code;
1313
// see also next option!
14-
opts = opts.update({"interpreter", "prefer_native"}, false);
14+
opts = opts.update({"interpreter", "prefer_native"}, true);
1515
// switch to `false` when enabling `prefer_native` should also affect use
1616
// of built-in parsers in quotations; this is usually the case, but setting
1717
// both to `true` may be necessary for handling non-builtin parsers with

0 commit comments

Comments
 (0)