refactor: rename String.bytes to String.toByteArray (#11343)

TwoFX · web-flow · commit 151c034f4fb2 · 2025-11-24T18:59:49.000Z
This PR renames `String.bytes` to `String.toByteArray`.

This is for two reasons: first, `toByteArray` is a better name, and
second, we have something else that wants to use the name `bytes`,
namely the function that returns in iterator over the string's bytes.
diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean
diff --git a/src/Init/Data/String/Defs.lean b/src/Init/Data/String/Defs.lean
@@ -74,11 +74,11 @@ Encodes a string in UTF-8 as an array of bytes.
 -/
 @[extern "lean_string_to_utf8"]
 def String.toUTF8 (a : @& String) : ByteArray :=
-  a.bytes
+  a.toByteArray
 
-@[simp] theorem String.toUTF8_eq_bytes {s : String} : s.toUTF8 = s.bytes := (rfl)
+@[simp] theorem String.toUTF8_eq_toByteArray {s : String} : s.toUTF8 = s.toByteArray := (rfl)
 
-@[simp] theorem String.bytes_empty : "".bytes = ByteArray.empty := (rfl)
+@[simp] theorem String.toByteArray_empty : "".toByteArray = ByteArray.empty := (rfl)
 
 /--
 Appends two strings. Usually accessed via the `++` operator.
@@ -92,33 +92,33 @@ Examples:
 -/
 @[extern "lean_string_append", expose]
 def String.append (s : String) (t : @& String) : String where
-  bytes := s.bytes ++ t.bytes
+  toByteArray := s.toByteArray ++ t.toByteArray
   isValidUTF8 := s.isValidUTF8.append t.isValidUTF8
 
 instance : Append String where
   append s t := s.append t
 
 @[simp]
-theorem String.bytes_append {s t : String} : (s ++ t).bytes = s.bytes ++ t.bytes := (rfl)
+theorem String.toByteArray_append {s t : String} : (s ++ t).toByteArray = s.toByteArray ++ t.toByteArray := (rfl)
 
-theorem String.bytes_inj {s t : String} : s.bytes = t.bytes ↔ s = t := by
+theorem String.toByteArray_inj {s t : String} : s.toByteArray = t.toByteArray ↔ s = t := by
   refine ⟨fun h => ?_, (· ▸ rfl)⟩
   rcases s with ⟨s⟩
   rcases t with ⟨t⟩
   subst h
   rfl
 
-@[simp] theorem String.bytes_ofList {l : List Char} : (String.ofList l).bytes = l.utf8Encode := by
+@[simp] theorem String.toByteArray_ofList {l : List Char} : (String.ofList l).toByteArray = l.utf8Encode := by
   simp [String.ofList]
 
-@[deprecated String.bytes_ofList (since := "2025-10-30")]
-theorem List.bytes_asString {l : List Char} : (String.ofList l).bytes = l.utf8Encode :=
-  String.bytes_ofList
+@[deprecated String.toByteArray_ofList (since := "2025-10-30")]
+theorem List.toByteArray_asString {l : List Char} : (String.ofList l).toByteArray = l.utf8Encode :=
+  String.toByteArray_ofList
 
 theorem String.exists_eq_ofList (s : String) :
     ∃ l : List Char, s = String.ofList l := by
   rcases s with ⟨_, ⟨l, rfl⟩⟩
-  refine ⟨l, by simp [← String.bytes_inj]⟩
+  refine ⟨l, by simp [← String.toByteArray_inj]⟩
 
 @[deprecated String.exists_eq_ofList (since := "2025-10-30")]
 theorem String.exists_eq_asString (s : String) :
@@ -134,10 +134,10 @@ theorem String.utf8ByteSize_append {s t : String} :
   simp [utf8ByteSize]
 
 @[simp]
-theorem String.size_bytes {s : String} : s.bytes.size = s.utf8ByteSize := rfl
+theorem String.size_toByteArray {s : String} : s.toByteArray.size = s.utf8ByteSize := rfl
 
 @[simp]
-theorem String.bytes_push {s : String} {c : Char} : (s.push c).bytes = s.bytes ++ [c].utf8Encode := by
+theorem String.toByteArray_push {s : String} {c : Char} : (s.push c).toByteArray = s.toByteArray ++ [c].utf8Encode := by
   simp [push]
 
 namespace String
@@ -160,32 +160,32 @@ theorem utf8ByteSize_ofByteArray {b : ByteArray} {h} :
     (String.ofByteArray b h).utf8ByteSize = b.size := rfl
 
 @[simp]
-theorem bytes_singleton {c : Char} : (String.singleton c).bytes = [c].utf8Encode := by
+theorem toByteArray_singleton {c : Char} : (String.singleton c).toByteArray = [c].utf8Encode := by
   simp [singleton]
 
 theorem singleton_eq_ofList {c : Char} : String.singleton c = String.ofList [c] := by
-  simp [← String.bytes_inj]
+  simp [← String.toByteArray_inj]
 
 @[deprecated singleton_eq_ofList (since := "2025-10-30")]
 theorem singleton_eq_asString {c : Char} : String.singleton c = String.ofList [c] :=
   singleton_eq_ofList
 
 @[simp]
 theorem append_singleton {s : String} {c : Char} : s ++ singleton c = s.push c := by
-  simp [← bytes_inj]
+  simp [← toByteArray_inj]
 
 @[simp]
 theorem append_left_inj {s₁ s₂ : String} (t : String) :
     s₁ ++ t = s₂ ++ t ↔ s₁ = s₂ := by
-  simp [← bytes_inj]
+  simp [← toByteArray_inj]
 
 theorem append_assoc {s₁ s₂ s₃ : String} : s₁ ++ s₂ ++ s₃ = s₁ ++ (s₂ ++ s₃) := by
-  simp [← bytes_inj, ByteArray.append_assoc]
+  simp [← toByteArray_inj, ByteArray.append_assoc]
 
 @[simp]
 theorem utf8ByteSize_eq_zero_iff {s : String} : s.utf8ByteSize = 0 ↔ s = "" := by
   refine ⟨fun h => ?_, fun h => h ▸ utf8ByteSize_empty⟩
-  simpa [← bytes_inj, ← ByteArray.size_eq_zero_iff] using h
+  simpa [← toByteArray_inj, ← ByteArray.size_eq_zero_iff] using h
 
 theorem rawEndPos_eq_zero_iff {b : String} : b.rawEndPos = 0 ↔ b = "" := by
   simp
@@ -296,14 +296,14 @@ Examples:
 -/
 structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where private mk ::
   le_rawEndPos : off ≤ s.rawEndPos
-  isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8
+  isValidUTF8_extract_zero : (s.toByteArray.extract 0 off.byteIdx).IsValidUTF8
 
 theorem Pos.Raw.IsValid.le_utf8ByteSize {s : String} {off : String.Pos.Raw} (h : off.IsValid s) :
     off.byteIdx ≤ s.utf8ByteSize := by
   simpa [Pos.Raw.le_iff] using h.le_rawEndPos
 
 theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} :
-    p.IsValid s ↔ p ≤ s.rawEndPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 :=
+    p.IsValid s ↔ p ≤ s.rawEndPos ∧ (s.toByteArray.extract 0 p.byteIdx).IsValidUTF8 :=
   ⟨fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩
 
 @[deprecated le_rawEndPos (since := "2025-10-20")]
@@ -319,7 +319,7 @@ theorem Pos.Raw.isValid_zero {s : String} : (0 : Pos.Raw).IsValid s where
 @[simp]
 theorem Pos.Raw.isValid_rawEndPos {s : String} : s.rawEndPos.IsValid s where
   le_rawEndPos := by simp
-  isValidUTF8_extract_zero := by simp [← size_bytes, s.isValidUTF8]
+  isValidUTF8_extract_zero := by simp [← size_toByteArray, s.isValidUTF8]
 
 theorem Pos.Raw.isValid_of_eq_rawEndPos {s : String} {p : Pos.Raw} (h : p = s.rawEndPos) :
     p.IsValid s := by
@@ -650,4 +650,8 @@ abbrev startValidPos (s : String) : s.Pos :=
 abbrev endValidPos (s : String) : s.Pos :=
   s.endPos
 
+@[deprecated String.toByteArray (since := "2025-11-24")]
+abbrev String.bytes (s : String) : ByteArray :=
+  s.toByteArray
+
 end String
diff --git a/src/Init/Data/String/Lemmas/Splits.lean b/src/Init/Data/String/Lemmas/Splits.lean
@@ -82,29 +82,29 @@ theorem Pos.Splits.toSlice {s : String} {p : s.Pos} {t₁ t₂ : String}
 
 theorem Pos.splits {s : String} (p : s.Pos) :
     p.Splits (s.sliceTo p).copy (s.sliceFrom p).copy where
-  eq_append := by simp [← bytes_inj, Slice.bytes_copy, ← size_bytes]
+  eq_append := by simp [← toByteArray_inj, Slice.toByteArray_copy, ← size_toByteArray]
   offset_eq_rawEndPos := by simp
 
 theorem Slice.Pos.splits {s : Slice} (p : s.Pos) :
     p.Splits (s.sliceTo p).copy (s.sliceFrom p).copy where
   eq_append := copy_eq_copy_sliceTo
   offset_eq_rawEndPos := by simp
 
-theorem Pos.Splits.bytes_left_eq {s : String} {p : s.Pos} {t₁ t₂}
-    (h : p.Splits t₁ t₂) : t₁.bytes = s.bytes.extract 0 p.offset.byteIdx := by
+theorem Pos.Splits.toByteArray_left_eq {s : String} {p : s.Pos} {t₁ t₂}
+    (h : p.Splits t₁ t₂) : t₁.toByteArray = s.toByteArray.extract 0 p.offset.byteIdx := by
   simp [h.eq_append, h.offset_eq_rawEndPos, ByteArray.extract_append_eq_left]
 
-theorem Pos.Splits.bytes_right_eq {s : String} {p : s.Pos} {t₁ t₂}
-    (h : p.Splits t₁ t₂) : t₂.bytes = s.bytes.extract p.offset.byteIdx s.utf8ByteSize := by
+theorem Pos.Splits.toByteArray_right_eq {s : String} {p : s.Pos} {t₁ t₂}
+    (h : p.Splits t₁ t₂) : t₂.toByteArray = s.toByteArray.extract p.offset.byteIdx s.utf8ByteSize := by
   simp [h.eq_append, h.offset_eq_rawEndPos, ByteArray.extract_append_eq_right]
 
 theorem Pos.Splits.eq_left {s : String} {p : s.Pos} {t₁ t₂ t₃ t₄}
     (h₁ : p.Splits t₁ t₂) (h₂ : p.Splits t₃ t₄) : t₁ = t₃ := by
-  rw [← String.bytes_inj, h₁.bytes_left_eq, h₂.bytes_left_eq]
+  rw [← String.toByteArray_inj, h₁.toByteArray_left_eq, h₂.toByteArray_left_eq]
 
 theorem Pos.Splits.eq_right {s : String} {p : s.Pos} {t₁ t₂ t₃ t₄}
     (h₁ : p.Splits t₁ t₂) (h₂ : p.Splits t₃ t₄) : t₂ = t₄ := by
-  rw [← String.bytes_inj, h₁.bytes_right_eq, h₂.bytes_right_eq]
+  rw [← String.toByteArray_inj, h₁.toByteArray_right_eq, h₂.toByteArray_right_eq]
 
 theorem Pos.Splits.eq {s : String} {p : s.Pos} {t₁ t₂ t₃ t₄}
     (h₁ : p.Splits t₁ t₂) (h₂ : p.Splits t₃ t₄) : t₁ = t₃ ∧ t₂ = t₄ :=
diff --git a/src/Init/Data/String/Modify.lean b/src/Init/Data/String/Modify.lean
@@ -35,7 +35,7 @@ Examples:
 @[extern "lean_string_utf8_set", expose]
 def Pos.set {s : String} (p : s.Pos) (c : Char) (hp : p ≠ s.endPos) : String :=
   if hc : c.utf8Size = 1 ∧ (p.byte hp).utf8ByteSize isUTF8FirstByte_byte = 1 then
-    .ofByteArray (s.bytes.set p.offset.byteIdx c.toUInt8 (p.byteIdx_lt_utf8ByteSize hp)) (by
+    .ofByteArray (s.toByteArray.set p.offset.byteIdx c.toUInt8 (p.byteIdx_lt_utf8ByteSize hp)) (by
       rw [ByteArray.set_eq_push_extract_append_extract, ← hc.2, utf8ByteSize_byte,
         ← Pos.byteIdx_offset_next]
       refine ByteArray.IsValidUTF8.append ?_ (p.next hp).isValid.isValidUTF8_extract_utf8ByteSize
@@ -48,7 +48,7 @@ theorem Pos.set_eq_append {s : String} {p : s.Pos} {c : Char} {hp} :
   rw [set]
   split
   · rename_i h
-    simp [← bytes_inj, ByteArray.set_eq_push_extract_append_extract, Slice.bytes_copy,
+    simp [← toByteArray_inj, ByteArray.set_eq_push_extract_append_extract, Slice.toByteArray_copy,
       List.utf8Encode_singleton, String.utf8EncodeChar_eq_singleton h.1, utf8ByteSize_byte ▸ h.2]
   · rfl
 
diff --git a/src/Init/Data/String/PosRaw.lean b/src/Init/Data/String/PosRaw.lean
@@ -108,7 +108,7 @@ At runtime, this function is implemented by efficient, constant-time code.
 -/
 @[extern "lean_string_get_byte_fast", expose]
 def getUTF8Byte (s : @& String) (p : Pos.Raw) (h : p < s.rawEndPos) : UInt8 :=
-  s.bytes[p.byteIdx]
+  s.toByteArray[p.byteIdx]
 
 @[deprecated getUTF8Byte (since := "2025-10-01"), extern "lean_string_get_byte_fast", expose]
 abbrev getUtf8Byte (s : String) (p : Pos.Raw) (h : p < s.rawEndPos) : UInt8 :=
diff --git a/src/Init/Prelude.lean b/src/Init/Prelude.lean
@@ -3449,11 +3449,11 @@ structure String where ofByteArray ::
   /-- The bytes of the UTF-8 encoding of the string. Since strings have a special representation in
   the runtime, this function actually takes linear time and space at runtime. For efficient access
   to the string's bytes, use `String.utf8ByteSize` and `String.getUTF8Byte`. -/
-  bytes : ByteArray
+  toByteArray : ByteArray
   /-- The bytes of the string form valid UTF-8. -/
-  isValidUTF8 : ByteArray.IsValidUTF8 bytes
+  isValidUTF8 : ByteArray.IsValidUTF8 toByteArray
 
-attribute [extern "lean_string_to_utf8"] String.bytes
+attribute [extern "lean_string_to_utf8"] String.toByteArray
 attribute [extern "lean_string_from_utf8_unchecked"] String.ofByteArray
 
 /--
@@ -3468,7 +3468,7 @@ def String.decEq (s₁ s₂ : @& String) : Decidable (Eq s₁ s₂) :=
   | ⟨⟨⟨s₁⟩⟩, _⟩, ⟨⟨⟨s₂⟩⟩, _⟩ =>
     dite (Eq s₁ s₂) (fun h => match s₁, s₂, h with | _, _, Eq.refl _ => isTrue rfl)
       (fun h => isFalse
-        (fun h' => h (congrArg (fun s => Array.toList (ByteArray.data (String.bytes s))) h')))
+        (fun h' => h (congrArg (fun s => Array.toList (ByteArray.data (String.toByteArray s))) h')))
 
 instance : DecidableEq String := String.decEq
 
@@ -3534,7 +3534,7 @@ At runtime, this function takes constant time because the byte length of strings
 -/
 @[extern "lean_string_utf8_byte_size"]
 def String.utf8ByteSize (s : @& String) : Nat :=
-  s.bytes.size
+  s.toByteArray.size
 
 /--
 A UTF-8 byte position that points at the end of a string, just after the last character.
diff --git a/tests/lean/run/issue11186.lean b/tests/lean/run/issue11186.lean
@@ -5,8 +5,9 @@
 error: Failed to compile pattern matching: Stuck at
   remaining variables: [x✝:(String)]
   alternatives:
-    [bytes✝:(ByteArray),
-     isValidUTF8✝:(bytes✝.IsValidUTF8)] |- [(String.ofByteArray bytes✝ isValidUTF8✝)] => h_1 bytes✝ isValidUTF8✝
+    [toByteArray✝:(ByteArray),
+     isValidUTF8✝:(toByteArray✝.IsValidUTF8)] |- [(String.ofByteArray toByteArray✝ isValidUTF8✝)] => h_1 toByteArray✝
+      isValidUTF8✝
     [] |- ["Eek"] => h_2 ()
     [x✝:(String)] |- [x✝] => h_3 x✝
   examples:_
diff --git a/tests/lean/setLit.lean.expected.out b/tests/lean/setLit.lean.expected.out
@@ -4,10 +4,10 @@ setLit.lean:22:19-22:21: error: overloaded, errors
   
   Hint: Type class instance resolution failures can be inspected with the `set_option trace.Meta.synthInstance true` command.
   
-  Fields missing: `bytes`, `isValidUTF8`
+  Fields missing: `toByteArray`, `isValidUTF8`
   
   Hint: Add missing fields:
-     ̲b̲y̲t̲e̲s̲ ̲:̲=̲ ̲_̲
+     ̲t̲o̲B̲y̲t̲e̲A̲r̲r̲a̲y̲ ̲:̲=̲ ̲_̲
     ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲T̲F̲8̲ ̲:̲=̲ ̲_̲ ̲
 setLit.lean:24:31-24:38: error: overloaded, errors 
   failed to synthesize instance of type class