|
| 1 | +/- |
| 2 | +Copyright (c) 2016 Microsoft Corporation. All rights reserved. |
| 3 | +Released under Apache 2.0 license as described in the file LICENSE. |
| 4 | +Author: Leonardo de Moura, Mario Carneiro |
| 5 | +-/ |
| 6 | +module |
| 7 | + |
| 8 | +prelude |
| 9 | +public import Init.Data.String.Basic |
| 10 | + |
| 11 | +/-! |
| 12 | +# `String.Iterator` |
| 13 | +
|
| 14 | +This file contains `String.Iterator`, an outgoing API to be replaced by the iterator framework in |
| 15 | +a future release. |
| 16 | +-/ |
| 17 | + |
| 18 | +public section |
| 19 | + |
| 20 | +namespace String |
| 21 | + |
| 22 | +/-- |
| 23 | +An iterator over the characters (Unicode code points) in a `String`. Typically created by |
| 24 | +`String.iter`. |
| 25 | +
|
| 26 | +String iterators pair a string with a valid byte index. This allows efficient character-by-character |
| 27 | +processing of strings while avoiding the need to manually ensure that byte indices are used with the |
| 28 | +correct strings. |
| 29 | +
|
| 30 | +An iterator is *valid* if the position `i` is *valid* for the string `s`, meaning `0 ≤ i ≤ s.rawEndPos` |
| 31 | +and `i` lies on a UTF8 byte boundary. If `i = s.rawEndPos`, the iterator is at the end of the string. |
| 32 | +
|
| 33 | +Most operations on iterators return unspecified values if the iterator is not valid. The functions |
| 34 | +in the `String.Iterator` API rule out the creation of invalid iterators, with two exceptions: |
| 35 | +- `Iterator.next iter` is invalid if `iter` is already at the end of the string (`iter.atEnd` is |
| 36 | + `true`), and |
| 37 | +- `Iterator.forward iter n`/`Iterator.nextn iter n` is invalid if `n` is strictly greater than the |
| 38 | + number of remaining characters. |
| 39 | +-/ |
| 40 | +structure Iterator where |
| 41 | + /-- The string being iterated over. -/ |
| 42 | + s : String |
| 43 | + /-- The current UTF-8 byte position in the string `s`. |
| 44 | +
|
| 45 | + This position is not guaranteed to be valid for the string. If the position is not valid, then the |
| 46 | + current character is `(default : Char)`, similar to `String.get` on an invalid position. |
| 47 | + -/ |
| 48 | + i : Pos.Raw |
| 49 | + deriving DecidableEq, Inhabited |
| 50 | + |
| 51 | +/-- Creates an iterator at the beginning of the string. -/ |
| 52 | +@[inline] def mkIterator (s : String) : Iterator := |
| 53 | + ⟨s, 0⟩ |
| 54 | + |
| 55 | +@[inherit_doc mkIterator] |
| 56 | +abbrev iter := mkIterator |
| 57 | + |
| 58 | +/-- |
| 59 | +The size of a string iterator is the number of bytes remaining. |
| 60 | +
|
| 61 | +Recursive functions that iterate towards the end of a string will typically decrease this measure. |
| 62 | +-/ |
| 63 | +instance : SizeOf String.Iterator where |
| 64 | + sizeOf i := i.1.utf8ByteSize - i.2.byteIdx |
| 65 | + |
| 66 | +theorem Iterator.sizeOf_eq (i : String.Iterator) : sizeOf i = i.1.utf8ByteSize - i.2.byteIdx := |
| 67 | + rfl |
| 68 | + |
| 69 | +namespace Iterator |
| 70 | +@[inline, inherit_doc Iterator.s] |
| 71 | +def toString := Iterator.s |
| 72 | + |
| 73 | +/-- |
| 74 | +The number of UTF-8 bytes remaining in the iterator. |
| 75 | +-/ |
| 76 | +@[inline] def remainingBytes : Iterator → Nat |
| 77 | + | ⟨s, i⟩ => s.rawEndPos.byteIdx - i.byteIdx |
| 78 | + |
| 79 | +@[inline, inherit_doc Iterator.i] |
| 80 | +def pos := Iterator.i |
| 81 | + |
| 82 | +/-- |
| 83 | +Gets the character at the iterator's current position. |
| 84 | +
|
| 85 | +A run-time bounds check is performed. Use `String.Iterator.curr'` to avoid redundant bounds checks. |
| 86 | +
|
| 87 | +If the position is invalid, returns `(default : Char)`. |
| 88 | +-/ |
| 89 | +@[inline] def curr : Iterator → Char |
| 90 | + | ⟨s, i⟩ => i.get s |
| 91 | + |
| 92 | +/-- |
| 93 | +Moves the iterator's position forward by one character, unconditionally. |
| 94 | +
|
| 95 | +It is only valid to call this function if the iterator is not at the end of the string (i.e. |
| 96 | +if `Iterator.atEnd` is `false`); otherwise, the resulting iterator will be invalid. |
| 97 | +-/ |
| 98 | +@[inline] def next : Iterator → Iterator |
| 99 | + | ⟨s, i⟩ => ⟨s, i.next s⟩ |
| 100 | + |
| 101 | +/-- |
| 102 | +Moves the iterator's position backward by one character, unconditionally. |
| 103 | +
|
| 104 | +The position is not changed if the iterator is at the beginning of the string. |
| 105 | +-/ |
| 106 | +@[inline] def prev : Iterator → Iterator |
| 107 | + | ⟨s, i⟩ => ⟨s, i.prev s⟩ |
| 108 | + |
| 109 | +/-- |
| 110 | +Checks whether the iterator is past its string's last character. |
| 111 | +-/ |
| 112 | +@[inline] def atEnd : Iterator → Bool |
| 113 | + | ⟨s, i⟩ => i.byteIdx ≥ s.rawEndPos.byteIdx |
| 114 | + |
| 115 | +/-- |
| 116 | +Checks whether the iterator is at or before the string's last character. |
| 117 | +-/ |
| 118 | +@[inline] def hasNext : Iterator → Bool |
| 119 | + | ⟨s, i⟩ => i.byteIdx < s.rawEndPos.byteIdx |
| 120 | + |
| 121 | +/-- |
| 122 | +Checks whether the iterator is after the beginning of the string. |
| 123 | +-/ |
| 124 | +@[inline] def hasPrev : Iterator → Bool |
| 125 | + | ⟨_, i⟩ => i.byteIdx > 0 |
| 126 | + |
| 127 | +/-- |
| 128 | +Gets the character at the iterator's current position. |
| 129 | +
|
| 130 | +The proof of `it.hasNext` ensures that there is, in fact, a character at the current position. This |
| 131 | +function is faster that `String.Iterator.curr` due to avoiding a run-time bounds check. |
| 132 | +-/ |
| 133 | +@[inline] def curr' (it : Iterator) (h : it.hasNext) : Char := |
| 134 | + match it with |
| 135 | + | ⟨s, i⟩ => i.get' s (by simpa only [hasNext, rawEndPos, decide_eq_true_eq, Pos.Raw.atEnd, ge_iff_le, Nat.not_le] using h) |
| 136 | + |
| 137 | +/-- |
| 138 | +Moves the iterator's position forward by one character, unconditionally. |
| 139 | +
|
| 140 | +The proof of `it.hasNext` ensures that there is, in fact, a position that's one character forwards. |
| 141 | +This function is faster that `String.Iterator.next` due to avoiding a run-time bounds check. |
| 142 | +-/ |
| 143 | +@[inline] def next' (it : Iterator) (h : it.hasNext) : Iterator := |
| 144 | + match it with |
| 145 | + | ⟨s, i⟩ => ⟨s, i.next' s (by simpa only [hasNext, rawEndPos, decide_eq_true_eq, Pos.Raw.atEnd, ge_iff_le, Nat.not_le] using h)⟩ |
| 146 | + |
| 147 | +/-- |
| 148 | +Replaces the current character in the string. |
| 149 | +
|
| 150 | +Does nothing if the iterator is at the end of the string. If both the replacement character and the |
| 151 | +replaced character are 7-bit ASCII characters and the string is not shared, then it is updated |
| 152 | +in-place and not copied. |
| 153 | +-/ |
| 154 | +@[inline] def setCurr : Iterator → Char → Iterator |
| 155 | + | ⟨s, i⟩, c => ⟨i.set s c, i⟩ |
| 156 | + |
| 157 | +/-- |
| 158 | +Moves the iterator's position to the end of the string, just past the last character. |
| 159 | +-/ |
| 160 | +@[inline] def toEnd : Iterator → Iterator |
| 161 | + | ⟨s, _⟩ => ⟨s, s.rawEndPos⟩ |
| 162 | + |
| 163 | +/-- |
| 164 | +Extracts the substring between the positions of two iterators. The first iterator's position is the |
| 165 | +start of the substring, and the second iterator's position is the end. |
| 166 | +
|
| 167 | +Returns the empty string if the iterators are for different strings, or if the position of the first |
| 168 | +iterator is past the position of the second iterator. |
| 169 | +-/ |
| 170 | +@[inline] def extract : Iterator → Iterator → String |
| 171 | + | ⟨s₁, b⟩, ⟨s₂, e⟩ => |
| 172 | + if s₁ ≠ s₂ || b > e then "" |
| 173 | + else b.extract s₁ e |
| 174 | + |
| 175 | +/-- |
| 176 | +Moves the iterator's position forward by the specified number of characters. |
| 177 | +
|
| 178 | +The resulting iterator is only valid if the number of characters to skip is less than or equal |
| 179 | +to the number of characters left in the iterator. |
| 180 | +-/ |
| 181 | +def forward : Iterator → Nat → Iterator |
| 182 | + | it, 0 => it |
| 183 | + | it, n+1 => forward it.next n |
| 184 | + |
| 185 | +/-- |
| 186 | +The remaining characters in an iterator, as a string. |
| 187 | +-/ |
| 188 | +@[inline] def remainingToString : Iterator → String |
| 189 | + | ⟨s, i⟩ => i.extract s s.rawEndPos |
| 190 | + |
| 191 | +@[inherit_doc forward] |
| 192 | +def nextn : Iterator → Nat → Iterator |
| 193 | + | it, 0 => it |
| 194 | + | it, i+1 => nextn it.next i |
| 195 | + |
| 196 | +/-- |
| 197 | +Moves the iterator's position back by the specified number of characters, stopping at the beginning |
| 198 | +of the string. |
| 199 | +-/ |
| 200 | +def prevn : Iterator → Nat → Iterator |
| 201 | + | it, 0 => it |
| 202 | + | it, i+1 => prevn it.prev i |
| 203 | + |
| 204 | +end Iterator |
| 205 | + |
| 206 | +end String |
0 commit comments