Skip to content

Commit b5dc11e

Browse files
authored
chore: move some material out of Init.Data.String.Basic (#10893)
This PR splits some low-hanging fruit out of `Init.Data.String.Basic`: basic material about `String.Pos.Raw`, `String.Substrig`, and `String.Iterator`. More splitting required and the remaining material is quite unorganized, but it's a start.
1 parent 08bc333 commit b5dc11e

40 files changed

+1507
-1376
lines changed

src/Init/Data/String.lean

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,6 @@ public import Init.Data.String.Bootstrap
1616
public import Init.Data.String.Slice
1717
public import Init.Data.String.Pattern
1818
public import Init.Data.String.Stream
19+
public import Init.Data.String.PosRaw
20+
public import Init.Data.String.Substring
21+
public import Init.Data.String.TakeDrop

src/Init/Data/String/Basic.lean

Lines changed: 59 additions & 1369 deletions
Large diffs are not rendered by default.

src/Init/Data/String/Extra.lean

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,24 @@ prelude
99
import all Init.Data.ByteArray.Basic
1010
public import Init.Data.String.Basic
1111
import all Init.Data.String.Basic
12+
public import Init.Data.String.Iterator
13+
import all Init.Data.String.Iterator
14+
public import Init.Data.String.Substring
1215

1316
public section
1417

18+
namespace Substring
19+
20+
/--
21+
Returns an iterator into the underlying string, at the substring's starting position. The ending
22+
position is discarded, so the iterator alone cannot be used to determine whether its current
23+
position is within the original substring.
24+
-/
25+
@[inline] def toIterator : Substring → String.Iterator
26+
| ⟨s, b, _⟩ => ⟨s, b⟩
27+
28+
end Substring
29+
1530
namespace String
1631

1732
/--

src/Init/Data/String/Iterator.lean

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/-
2+
Copyright (c) 2016 Microsoft Corporation. All rights reserved.
3+
Released under Apache 2.0 license as described in the file LICENSE.
4+
Author: Leonardo de Moura, Mario Carneiro
5+
-/
6+
module
7+
8+
prelude
9+
public import Init.Data.String.Basic
10+
11+
/-!
12+
# `String.Iterator`
13+
14+
This file contains `String.Iterator`, an outgoing API to be replaced by the iterator framework in
15+
a future release.
16+
-/
17+
18+
public section
19+
20+
namespace String
21+
22+
/--
23+
An iterator over the characters (Unicode code points) in a `String`. Typically created by
24+
`String.iter`.
25+
26+
String iterators pair a string with a valid byte index. This allows efficient character-by-character
27+
processing of strings while avoiding the need to manually ensure that byte indices are used with the
28+
correct strings.
29+
30+
An iterator is *valid* if the position `i` is *valid* for the string `s`, meaning `0 ≤ i ≤ s.rawEndPos`
31+
and `i` lies on a UTF8 byte boundary. If `i = s.rawEndPos`, the iterator is at the end of the string.
32+
33+
Most operations on iterators return unspecified values if the iterator is not valid. The functions
34+
in the `String.Iterator` API rule out the creation of invalid iterators, with two exceptions:
35+
- `Iterator.next iter` is invalid if `iter` is already at the end of the string (`iter.atEnd` is
36+
`true`), and
37+
- `Iterator.forward iter n`/`Iterator.nextn iter n` is invalid if `n` is strictly greater than the
38+
number of remaining characters.
39+
-/
40+
structure Iterator where
41+
/-- The string being iterated over. -/
42+
s : String
43+
/-- The current UTF-8 byte position in the string `s`.
44+
45+
This position is not guaranteed to be valid for the string. If the position is not valid, then the
46+
current character is `(default : Char)`, similar to `String.get` on an invalid position.
47+
-/
48+
i : Pos.Raw
49+
deriving DecidableEq, Inhabited
50+
51+
/-- Creates an iterator at the beginning of the string. -/
52+
@[inline] def mkIterator (s : String) : Iterator :=
53+
⟨s, 0
54+
55+
@[inherit_doc mkIterator]
56+
abbrev iter := mkIterator
57+
58+
/--
59+
The size of a string iterator is the number of bytes remaining.
60+
61+
Recursive functions that iterate towards the end of a string will typically decrease this measure.
62+
-/
63+
instance : SizeOf String.Iterator where
64+
sizeOf i := i.1.utf8ByteSize - i.2.byteIdx
65+
66+
theorem Iterator.sizeOf_eq (i : String.Iterator) : sizeOf i = i.1.utf8ByteSize - i.2.byteIdx :=
67+
rfl
68+
69+
namespace Iterator
70+
@[inline, inherit_doc Iterator.s]
71+
def toString := Iterator.s
72+
73+
/--
74+
The number of UTF-8 bytes remaining in the iterator.
75+
-/
76+
@[inline] def remainingBytes : Iterator → Nat
77+
| ⟨s, i⟩ => s.rawEndPos.byteIdx - i.byteIdx
78+
79+
@[inline, inherit_doc Iterator.i]
80+
def pos := Iterator.i
81+
82+
/--
83+
Gets the character at the iterator's current position.
84+
85+
A run-time bounds check is performed. Use `String.Iterator.curr'` to avoid redundant bounds checks.
86+
87+
If the position is invalid, returns `(default : Char)`.
88+
-/
89+
@[inline] def curr : Iterator → Char
90+
| ⟨s, i⟩ => i.get s
91+
92+
/--
93+
Moves the iterator's position forward by one character, unconditionally.
94+
95+
It is only valid to call this function if the iterator is not at the end of the string (i.e.
96+
if `Iterator.atEnd` is `false`); otherwise, the resulting iterator will be invalid.
97+
-/
98+
@[inline] def next : Iterator → Iterator
99+
| ⟨s, i⟩ => ⟨s, i.next s⟩
100+
101+
/--
102+
Moves the iterator's position backward by one character, unconditionally.
103+
104+
The position is not changed if the iterator is at the beginning of the string.
105+
-/
106+
@[inline] def prev : Iterator → Iterator
107+
| ⟨s, i⟩ => ⟨s, i.prev s⟩
108+
109+
/--
110+
Checks whether the iterator is past its string's last character.
111+
-/
112+
@[inline] def atEnd : Iterator → Bool
113+
| ⟨s, i⟩ => i.byteIdx ≥ s.rawEndPos.byteIdx
114+
115+
/--
116+
Checks whether the iterator is at or before the string's last character.
117+
-/
118+
@[inline] def hasNext : Iterator → Bool
119+
| ⟨s, i⟩ => i.byteIdx < s.rawEndPos.byteIdx
120+
121+
/--
122+
Checks whether the iterator is after the beginning of the string.
123+
-/
124+
@[inline] def hasPrev : Iterator → Bool
125+
| ⟨_, i⟩ => i.byteIdx > 0
126+
127+
/--
128+
Gets the character at the iterator's current position.
129+
130+
The proof of `it.hasNext` ensures that there is, in fact, a character at the current position. This
131+
function is faster that `String.Iterator.curr` due to avoiding a run-time bounds check.
132+
-/
133+
@[inline] def curr' (it : Iterator) (h : it.hasNext) : Char :=
134+
match it with
135+
| ⟨s, i⟩ => i.get' s (by simpa only [hasNext, rawEndPos, decide_eq_true_eq, Pos.Raw.atEnd, ge_iff_le, Nat.not_le] using h)
136+
137+
/--
138+
Moves the iterator's position forward by one character, unconditionally.
139+
140+
The proof of `it.hasNext` ensures that there is, in fact, a position that's one character forwards.
141+
This function is faster that `String.Iterator.next` due to avoiding a run-time bounds check.
142+
-/
143+
@[inline] def next' (it : Iterator) (h : it.hasNext) : Iterator :=
144+
match it with
145+
| ⟨s, i⟩ => ⟨s, i.next' s (by simpa only [hasNext, rawEndPos, decide_eq_true_eq, Pos.Raw.atEnd, ge_iff_le, Nat.not_le] using h)⟩
146+
147+
/--
148+
Replaces the current character in the string.
149+
150+
Does nothing if the iterator is at the end of the string. If both the replacement character and the
151+
replaced character are 7-bit ASCII characters and the string is not shared, then it is updated
152+
in-place and not copied.
153+
-/
154+
@[inline] def setCurr : Iterator → Char → Iterator
155+
| ⟨s, i⟩, c => ⟨i.set s c, i⟩
156+
157+
/--
158+
Moves the iterator's position to the end of the string, just past the last character.
159+
-/
160+
@[inline] def toEnd : Iterator → Iterator
161+
| ⟨s, _⟩ => ⟨s, s.rawEndPos⟩
162+
163+
/--
164+
Extracts the substring between the positions of two iterators. The first iterator's position is the
165+
start of the substring, and the second iterator's position is the end.
166+
167+
Returns the empty string if the iterators are for different strings, or if the position of the first
168+
iterator is past the position of the second iterator.
169+
-/
170+
@[inline] def extract : Iterator → Iterator → String
171+
| ⟨s₁, b⟩, ⟨s₂, e⟩ =>
172+
if s₁ ≠ s₂ || b > e then ""
173+
else b.extract s₁ e
174+
175+
/--
176+
Moves the iterator's position forward by the specified number of characters.
177+
178+
The resulting iterator is only valid if the number of characters to skip is less than or equal
179+
to the number of characters left in the iterator.
180+
-/
181+
def forward : Iterator → Nat → Iterator
182+
| it, 0 => it
183+
| it, n+1 => forward it.next n
184+
185+
/--
186+
The remaining characters in an iterator, as a string.
187+
-/
188+
@[inline] def remainingToString : Iterator → String
189+
| ⟨s, i⟩ => i.extract s s.rawEndPos
190+
191+
@[inherit_doc forward]
192+
def nextn : Iterator → Nat → Iterator
193+
| it, 0 => it
194+
| it, i+1 => nextn it.next i
195+
196+
/--
197+
Moves the iterator's position back by the specified number of characters, stopping at the beginning
198+
of the string.
199+
-/
200+
def prevn : Iterator → Nat → Iterator
201+
| it, 0 => it
202+
| it, i+1 => prevn it.prev i
203+
204+
end Iterator
205+
206+
end String

0 commit comments

Comments
 (0)