Skip to content

Commit 151c034

Browse files
authored
refactor: rename String.bytes to String.toByteArray (#11343)
This PR renames `String.bytes` to `String.toByteArray`. This is for two reasons: first, `toByteArray` is a better name, and second, we have something else that wants to use the name `bytes`, namely the function that returns in iterator over the string's bytes.
1 parent 2308e3a commit 151c034

File tree

8 files changed

+125
-108
lines changed

8 files changed

+125
-108
lines changed

src/Init/Data/String/Basic.lean

Lines changed: 79 additions & 67 deletions
Large diffs are not rendered by default.

src/Init/Data/String/Defs.lean

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,11 @@ Encodes a string in UTF-8 as an array of bytes.
7474
-/
7575
@[extern "lean_string_to_utf8"]
7676
def String.toUTF8 (a : @& String) : ByteArray :=
77-
a.bytes
77+
a.toByteArray
7878

79-
@[simp] theorem String.toUTF8_eq_bytes {s : String} : s.toUTF8 = s.bytes := (rfl)
79+
@[simp] theorem String.toUTF8_eq_toByteArray {s : String} : s.toUTF8 = s.toByteArray := (rfl)
8080

81-
@[simp] theorem String.bytes_empty : "".bytes = ByteArray.empty := (rfl)
81+
@[simp] theorem String.toByteArray_empty : "".toByteArray = ByteArray.empty := (rfl)
8282

8383
/--
8484
Appends two strings. Usually accessed via the `++` operator.
@@ -92,33 +92,33 @@ Examples:
9292
-/
9393
@[extern "lean_string_append", expose]
9494
def String.append (s : String) (t : @& String) : String where
95-
bytes := s.bytes ++ t.bytes
95+
toByteArray := s.toByteArray ++ t.toByteArray
9696
isValidUTF8 := s.isValidUTF8.append t.isValidUTF8
9797

9898
instance : Append String where
9999
append s t := s.append t
100100

101101
@[simp]
102-
theorem String.bytes_append {s t : String} : (s ++ t).bytes = s.bytes ++ t.bytes := (rfl)
102+
theorem String.toByteArray_append {s t : String} : (s ++ t).toByteArray = s.toByteArray ++ t.toByteArray := (rfl)
103103

104-
theorem String.bytes_inj {s t : String} : s.bytes = t.bytes ↔ s = t := by
104+
theorem String.toByteArray_inj {s t : String} : s.toByteArray = t.toByteArray ↔ s = t := by
105105
refine ⟨fun h => ?_, (· ▸ rfl)⟩
106106
rcases s with ⟨s⟩
107107
rcases t with ⟨t⟩
108108
subst h
109109
rfl
110110

111-
@[simp] theorem String.bytes_ofList {l : List Char} : (String.ofList l).bytes = l.utf8Encode := by
111+
@[simp] theorem String.toByteArray_ofList {l : List Char} : (String.ofList l).toByteArray = l.utf8Encode := by
112112
simp [String.ofList]
113113

114-
@[deprecated String.bytes_ofList (since := "2025-10-30")]
115-
theorem List.bytes_asString {l : List Char} : (String.ofList l).bytes = l.utf8Encode :=
116-
String.bytes_ofList
114+
@[deprecated String.toByteArray_ofList (since := "2025-10-30")]
115+
theorem List.toByteArray_asString {l : List Char} : (String.ofList l).toByteArray = l.utf8Encode :=
116+
String.toByteArray_ofList
117117

118118
theorem String.exists_eq_ofList (s : String) :
119119
∃ l : List Char, s = String.ofList l := by
120120
rcases s with ⟨_, ⟨l, rfl⟩⟩
121-
refine ⟨l, by simp [← String.bytes_inj]⟩
121+
refine ⟨l, by simp [← String.toByteArray_inj]⟩
122122

123123
@[deprecated String.exists_eq_ofList (since := "2025-10-30")]
124124
theorem String.exists_eq_asString (s : String) :
@@ -134,10 +134,10 @@ theorem String.utf8ByteSize_append {s t : String} :
134134
simp [utf8ByteSize]
135135

136136
@[simp]
137-
theorem String.size_bytes {s : String} : s.bytes.size = s.utf8ByteSize := rfl
137+
theorem String.size_toByteArray {s : String} : s.toByteArray.size = s.utf8ByteSize := rfl
138138

139139
@[simp]
140-
theorem String.bytes_push {s : String} {c : Char} : (s.push c).bytes = s.bytes ++ [c].utf8Encode := by
140+
theorem String.toByteArray_push {s : String} {c : Char} : (s.push c).toByteArray = s.toByteArray ++ [c].utf8Encode := by
141141
simp [push]
142142

143143
namespace String
@@ -160,32 +160,32 @@ theorem utf8ByteSize_ofByteArray {b : ByteArray} {h} :
160160
(String.ofByteArray b h).utf8ByteSize = b.size := rfl
161161

162162
@[simp]
163-
theorem bytes_singleton {c : Char} : (String.singleton c).bytes = [c].utf8Encode := by
163+
theorem toByteArray_singleton {c : Char} : (String.singleton c).toByteArray = [c].utf8Encode := by
164164
simp [singleton]
165165

166166
theorem singleton_eq_ofList {c : Char} : String.singleton c = String.ofList [c] := by
167-
simp [← String.bytes_inj]
167+
simp [← String.toByteArray_inj]
168168

169169
@[deprecated singleton_eq_ofList (since := "2025-10-30")]
170170
theorem singleton_eq_asString {c : Char} : String.singleton c = String.ofList [c] :=
171171
singleton_eq_ofList
172172

173173
@[simp]
174174
theorem append_singleton {s : String} {c : Char} : s ++ singleton c = s.push c := by
175-
simp [← bytes_inj]
175+
simp [← toByteArray_inj]
176176

177177
@[simp]
178178
theorem append_left_inj {s₁ s₂ : String} (t : String) :
179179
s₁ ++ t = s₂ ++ t ↔ s₁ = s₂ := by
180-
simp [← bytes_inj]
180+
simp [← toByteArray_inj]
181181

182182
theorem append_assoc {s₁ s₂ s₃ : String} : s₁ ++ s₂ ++ s₃ = s₁ ++ (s₂ ++ s₃) := by
183-
simp [← bytes_inj, ByteArray.append_assoc]
183+
simp [← toByteArray_inj, ByteArray.append_assoc]
184184

185185
@[simp]
186186
theorem utf8ByteSize_eq_zero_iff {s : String} : s.utf8ByteSize = 0 ↔ s = "" := by
187187
refine ⟨fun h => ?_, fun h => h ▸ utf8ByteSize_empty⟩
188-
simpa [← bytes_inj, ← ByteArray.size_eq_zero_iff] using h
188+
simpa [← toByteArray_inj, ← ByteArray.size_eq_zero_iff] using h
189189

190190
theorem rawEndPos_eq_zero_iff {b : String} : b.rawEndPos = 0 ↔ b = "" := by
191191
simp
@@ -296,14 +296,14 @@ Examples:
296296
-/
297297
structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where private mk ::
298298
le_rawEndPos : off ≤ s.rawEndPos
299-
isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8
299+
isValidUTF8_extract_zero : (s.toByteArray.extract 0 off.byteIdx).IsValidUTF8
300300

301301
theorem Pos.Raw.IsValid.le_utf8ByteSize {s : String} {off : String.Pos.Raw} (h : off.IsValid s) :
302302
off.byteIdx ≤ s.utf8ByteSize := by
303303
simpa [Pos.Raw.le_iff] using h.le_rawEndPos
304304

305305
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} :
306-
p.IsValid s ↔ p ≤ s.rawEndPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 :=
306+
p.IsValid s ↔ p ≤ s.rawEndPos ∧ (s.toByteArray.extract 0 p.byteIdx).IsValidUTF8 :=
307307
fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩
308308

309309
@[deprecated le_rawEndPos (since := "2025-10-20")]
@@ -319,7 +319,7 @@ theorem Pos.Raw.isValid_zero {s : String} : (0 : Pos.Raw).IsValid s where
319319
@[simp]
320320
theorem Pos.Raw.isValid_rawEndPos {s : String} : s.rawEndPos.IsValid s where
321321
le_rawEndPos := by simp
322-
isValidUTF8_extract_zero := by simp [← size_bytes, s.isValidUTF8]
322+
isValidUTF8_extract_zero := by simp [← size_toByteArray, s.isValidUTF8]
323323

324324
theorem Pos.Raw.isValid_of_eq_rawEndPos {s : String} {p : Pos.Raw} (h : p = s.rawEndPos) :
325325
p.IsValid s := by
@@ -650,4 +650,8 @@ abbrev startValidPos (s : String) : s.Pos :=
650650
abbrev endValidPos (s : String) : s.Pos :=
651651
s.endPos
652652

653+
@[deprecated String.toByteArray (since := "2025-11-24")]
654+
abbrev String.bytes (s : String) : ByteArray :=
655+
s.toByteArray
656+
653657
end String

src/Init/Data/String/Lemmas/Splits.lean

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,29 +82,29 @@ theorem Pos.Splits.toSlice {s : String} {p : s.Pos} {t₁ t₂ : String}
8282

8383
theorem Pos.splits {s : String} (p : s.Pos) :
8484
p.Splits (s.sliceTo p).copy (s.sliceFrom p).copy where
85-
eq_append := by simp [← bytes_inj, Slice.bytes_copy, ← size_bytes]
85+
eq_append := by simp [← toByteArray_inj, Slice.toByteArray_copy, ← size_toByteArray]
8686
offset_eq_rawEndPos := by simp
8787

8888
theorem Slice.Pos.splits {s : Slice} (p : s.Pos) :
8989
p.Splits (s.sliceTo p).copy (s.sliceFrom p).copy where
9090
eq_append := copy_eq_copy_sliceTo
9191
offset_eq_rawEndPos := by simp
9292

93-
theorem Pos.Splits.bytes_left_eq {s : String} {p : s.Pos} {t₁ t₂}
94-
(h : p.Splits t₁ t₂) : t₁.bytes = s.bytes.extract 0 p.offset.byteIdx := by
93+
theorem Pos.Splits.toByteArray_left_eq {s : String} {p : s.Pos} {t₁ t₂}
94+
(h : p.Splits t₁ t₂) : t₁.toByteArray = s.toByteArray.extract 0 p.offset.byteIdx := by
9595
simp [h.eq_append, h.offset_eq_rawEndPos, ByteArray.extract_append_eq_left]
9696

97-
theorem Pos.Splits.bytes_right_eq {s : String} {p : s.Pos} {t₁ t₂}
98-
(h : p.Splits t₁ t₂) : t₂.bytes = s.bytes.extract p.offset.byteIdx s.utf8ByteSize := by
97+
theorem Pos.Splits.toByteArray_right_eq {s : String} {p : s.Pos} {t₁ t₂}
98+
(h : p.Splits t₁ t₂) : t₂.toByteArray = s.toByteArray.extract p.offset.byteIdx s.utf8ByteSize := by
9999
simp [h.eq_append, h.offset_eq_rawEndPos, ByteArray.extract_append_eq_right]
100100

101101
theorem Pos.Splits.eq_left {s : String} {p : s.Pos} {t₁ t₂ t₃ t₄}
102102
(h₁ : p.Splits t₁ t₂) (h₂ : p.Splits t₃ t₄) : t₁ = t₃ := by
103-
rw [← String.bytes_inj, h₁.bytes_left_eq, h₂.bytes_left_eq]
103+
rw [← String.toByteArray_inj, h₁.toByteArray_left_eq, h₂.toByteArray_left_eq]
104104

105105
theorem Pos.Splits.eq_right {s : String} {p : s.Pos} {t₁ t₂ t₃ t₄}
106106
(h₁ : p.Splits t₁ t₂) (h₂ : p.Splits t₃ t₄) : t₂ = t₄ := by
107-
rw [← String.bytes_inj, h₁.bytes_right_eq, h₂.bytes_right_eq]
107+
rw [← String.toByteArray_inj, h₁.toByteArray_right_eq, h₂.toByteArray_right_eq]
108108

109109
theorem Pos.Splits.eq {s : String} {p : s.Pos} {t₁ t₂ t₃ t₄}
110110
(h₁ : p.Splits t₁ t₂) (h₂ : p.Splits t₃ t₄) : t₁ = t₃ ∧ t₂ = t₄ :=

src/Init/Data/String/Modify.lean

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Examples:
3535
@[extern "lean_string_utf8_set", expose]
3636
def Pos.set {s : String} (p : s.Pos) (c : Char) (hp : p ≠ s.endPos) : String :=
3737
if hc : c.utf8Size = 1 ∧ (p.byte hp).utf8ByteSize isUTF8FirstByte_byte = 1 then
38-
.ofByteArray (s.bytes.set p.offset.byteIdx c.toUInt8 (p.byteIdx_lt_utf8ByteSize hp)) (by
38+
.ofByteArray (s.toByteArray.set p.offset.byteIdx c.toUInt8 (p.byteIdx_lt_utf8ByteSize hp)) (by
3939
rw [ByteArray.set_eq_push_extract_append_extract, ← hc.2, utf8ByteSize_byte,
4040
← Pos.byteIdx_offset_next]
4141
refine ByteArray.IsValidUTF8.append ?_ (p.next hp).isValid.isValidUTF8_extract_utf8ByteSize
@@ -48,7 +48,7 @@ theorem Pos.set_eq_append {s : String} {p : s.Pos} {c : Char} {hp} :
4848
rw [set]
4949
split
5050
· rename_i h
51-
simp [← bytes_inj, ByteArray.set_eq_push_extract_append_extract, Slice.bytes_copy,
51+
simp [← toByteArray_inj, ByteArray.set_eq_push_extract_append_extract, Slice.toByteArray_copy,
5252
List.utf8Encode_singleton, String.utf8EncodeChar_eq_singleton h.1, utf8ByteSize_byte ▸ h.2]
5353
· rfl
5454

src/Init/Data/String/PosRaw.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ At runtime, this function is implemented by efficient, constant-time code.
108108
-/
109109
@[extern "lean_string_get_byte_fast", expose]
110110
def getUTF8Byte (s : @& String) (p : Pos.Raw) (h : p < s.rawEndPos) : UInt8 :=
111-
s.bytes[p.byteIdx]
111+
s.toByteArray[p.byteIdx]
112112

113113
@[deprecated getUTF8Byte (since := "2025-10-01"), extern "lean_string_get_byte_fast", expose]
114114
abbrev getUtf8Byte (s : String) (p : Pos.Raw) (h : p < s.rawEndPos) : UInt8 :=

src/Init/Prelude.lean

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3449,11 +3449,11 @@ structure String where ofByteArray ::
34493449
/-- The bytes of the UTF-8 encoding of the string. Since strings have a special representation in
34503450
the runtime, this function actually takes linear time and space at runtime. For efficient access
34513451
to the string's bytes, use `String.utf8ByteSize` and `String.getUTF8Byte`. -/
3452-
bytes : ByteArray
3452+
toByteArray : ByteArray
34533453
/-- The bytes of the string form valid UTF-8. -/
3454-
isValidUTF8 : ByteArray.IsValidUTF8 bytes
3454+
isValidUTF8 : ByteArray.IsValidUTF8 toByteArray
34553455

3456-
attribute [extern "lean_string_to_utf8"] String.bytes
3456+
attribute [extern "lean_string_to_utf8"] String.toByteArray
34573457
attribute [extern "lean_string_from_utf8_unchecked"] String.ofByteArray
34583458

34593459
/--
@@ -3468,7 +3468,7 @@ def String.decEq (s₁ s₂ : @& String) : Decidable (Eq s₁ s₂) :=
34683468
| ⟨⟨⟨s₁⟩⟩, _⟩, ⟨⟨⟨s₂⟩⟩, _⟩ =>
34693469
dite (Eq s₁ s₂) (fun h => match s₁, s₂, h with | _, _, Eq.refl _ => isTrue rfl)
34703470
(fun h => isFalse
3471-
(fun h' => h (congrArg (fun s => Array.toList (ByteArray.data (String.bytes s))) h')))
3471+
(fun h' => h (congrArg (fun s => Array.toList (ByteArray.data (String.toByteArray s))) h')))
34723472

34733473
instance : DecidableEq String := String.decEq
34743474

@@ -3534,7 +3534,7 @@ At runtime, this function takes constant time because the byte length of strings
35343534
-/
35353535
@[extern "lean_string_utf8_byte_size"]
35363536
def String.utf8ByteSize (s : @& String) : Nat :=
3537-
s.bytes.size
3537+
s.toByteArray.size
35383538

35393539
/--
35403540
A UTF-8 byte position that points at the end of a string, just after the last character.

tests/lean/run/issue11186.lean

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
error: Failed to compile pattern matching: Stuck at
66
remaining variables: [x✝:(String)]
77
alternatives:
8-
[bytes✝:(ByteArray),
9-
isValidUTF8✝:(bytes✝.IsValidUTF8)] |- [(String.ofByteArray bytes✝ isValidUTF8✝)] => h_1 bytes✝ isValidUTF8✝
8+
[toByteArray✝:(ByteArray),
9+
isValidUTF8✝:(toByteArray✝.IsValidUTF8)] |- [(String.ofByteArray toByteArray✝ isValidUTF8✝)] => h_1 toByteArray✝
10+
isValidUTF8✝
1011
[] |- ["Eek"] => h_2 ()
1112
[x✝:(String)] |- [x✝] => h_3 x✝
1213
examples:_

tests/lean/setLit.lean.expected.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ setLit.lean:22:19-22:21: error: overloaded, errors
44

55
Hint: Type class instance resolution failures can be inspected with the `set_option trace.Meta.synthInstance true` command.
66

7-
Fields missing: `bytes`, `isValidUTF8`
7+
Fields missing: `toByteArray`, `isValidUTF8`
88

99
Hint: Add missing fields:
10-
̲b̲y̲t̲e̲s̲ ̲:̲=̲ ̲_̲
10+
̲t̲o̲B̲y̲t̲e̲A̲r̲r̲a̲y̲ ̲:̲=̲ ̲_̲
1111
̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲T̲F̲8̲ ̲:̲=̲ ̲_̲ ̲
1212
setLit.lean:24:31-24:38: error: overloaded, errors
1313
failed to synthesize instance of type class

0 commit comments

Comments
 (0)