@@ -64,103 +64,73 @@ not a sequence of Unicode scalar values.
6464-/
6565@[inline, expose]
6666def ByteArray.utf8Decode? (b : ByteArray) : Option (Array Char) :=
67- go (b.size + 1 ) 0 #[] ( by simp) (by simp)
67+ go 0 #[] (by simp)
6868where
69- go (fuel : Nat) (i : Nat) (acc : Array Char) (hi : i ≤ b.size) (hf : b.size - i < fuel) : Option (Array Char) :=
70- match fuel, hf with
71- | fuel + 1 , _ =>
72- if i = b.size then
73- some acc
74- else
75- match h : utf8DecodeChar? b i with
76- | none => none
77- | some c => go fuel (i + c.utf8Size) (acc.push c)
78- (le_size_of_utf8DecodeChar?_eq_some h)
79- (have := c.utf8Size_pos; have := le_size_of_utf8DecodeChar?_eq_some h; by omega)
80- termination_by structural fuel
69+ @[semireducible]
70+ go (i : Nat) (acc : Array Char) (hi : i ≤ b.size) : Option (Array Char) :=
71+ if i < b.size then
72+ match h : utf8DecodeChar? b i with
73+ | none => none
74+ | some c => go (i + c.utf8Size) (acc.push c) (le_size_of_utf8DecodeChar?_eq_some h)
75+ else
76+ some acc
77+ termination_by b.size - i
78+ decreasing_by have := c.utf8Size_pos; omega
8179
8280@[expose, extern "lean_string_validate_utf8"]
8381def ByteArray.validateUTF8 (b : @& ByteArray) : Bool :=
84- go (b.size + 1 ) 0 ( by simp) (by simp)
82+ go 0 (by simp)
8583where
86- go (fuel : Nat) (i : Nat) (hi : i ≤ b.size) (hf : b.size - i < fuel) : Bool :=
87- match fuel, hf with
88- | fuel + 1 , _ =>
89- if hi : i = b.size then
90- true
91- else
92- match h : validateUTF8At b i with
93- | false => false
94- | true => go fuel (i + b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h))
95- ?_ ?_
96- termination_by structural fuel
84+ @[semireducible]
85+ go (i : Nat) (hi : i ≤ b.size) : Bool :=
86+ if hi : i < b.size then
87+ match h : validateUTF8At b i with
88+ | false => false
89+ | true => go (i + b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h)) ?_
90+ else
91+ true
92+ termination_by b.size - i
93+ decreasing_by
94+ have := b[i].utf8ByteSize_pos (isUTF8FirstByte_of_validateUTF8At h); omega
9795finally
9896 all_goals rw [ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar?] at h
9997 · rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
10098 exact add_utf8Size_utf8DecodeChar_le_size
101- · rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
102- have := add_utf8Size_utf8DecodeChar_le_size (h := h)
103- have := (b.utf8DecodeChar i h).utf8Size_pos
104- omega
10599
106- theorem ByteArray.isSome_utf8Decode?Go_eq_validateUTF8Go {b : ByteArray} {fuel : Nat}
107- {i : Nat} {acc : Array Char} {hi : i ≤ b.size} {hf : b.size - i < fuel} :
108- (utf8Decode?.go b fuel i acc hi hf ).isSome = validateUTF8.go b fuel i hi hf := by
100+ theorem ByteArray.isSome_utf8Decode?Go_eq_validateUTF8Go {b : ByteArray}
101+ {i : Nat} {acc : Array Char} {hi : i ≤ b.size} :
102+ (utf8Decode?.go b i acc hi).isSome = validateUTF8.go b i hi := by
109103 fun_induction utf8Decode?.go with
110- | case1 => simp [validateUTF8.go]
111- | case2 i acc hi fuel hf h₁ h₂ =>
112- simp only [Option.isSome_none, validateUTF8.go, h₁, ↓reduceDIte, Bool.false_eq]
104+ | case1 i acc hi h₁ h₂ =>
105+ unfold validateUTF8.go
106+ simp only [Option.isSome_none, ↓reduceDIte, Bool.false_eq, h₁ ]
113107 split
114108 · rfl
115109 · rename_i heq
116110 simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
117- | case3 i acc hi fuel hf h₁ c h₂ ih =>
118- simp [validateUTF8.go, h₁]
111+ | case2 i acc hi h₁ c h₂ ih =>
112+ unfold validateUTF8.go
113+ simp only [↓reduceDIte, ih, h₁]
119114 split
120115 · rename_i heq
121116 simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
122- · rw [ih]
123- congr
117+ · congr
124118 rw [← ByteArray.utf8Size_utf8DecodeChar (h := by simp [h₂])]
125119 simp [utf8DecodeChar, h₂]
120+ | case3 => unfold validateUTF8.go; simp [*]
126121
127122theorem ByteArray.isSome_utf8Decode?_eq_validateUTF8 {b : ByteArray} :
128123 b.utf8Decode?.isSome = b.validateUTF8 :=
129124 b.isSome_utf8Decode?Go_eq_validateUTF8Go
130125
131- theorem ByteArray.utf8Decode?.go.congr {b b' : ByteArray} {fuel fuel' i i' : Nat} {acc acc' : Array Char} {hi hi' hf hf'}
132- (hbb' : b = b') (hii' : i = i') (hacc : acc = acc') :
133- ByteArray.utf8Decode?.go b fuel i acc hi hf = ByteArray.utf8Decode?.go b' fuel' i' acc' hi' hf' := by
134- subst hbb' hii' hacc
135- fun_induction ByteArray.utf8Decode?.go b fuel i acc hi hf generalizing fuel' with
136- | case1 =>
137- rw [go.eq_def]
138- split
139- simp
140- | case2 =>
141- rw [go.eq_def]
142- split <;> split
143- · simp_all
144- · split <;> simp_all
145- | case3 =>
146- conv => rhs; rw [go.eq_def]
147- split <;> split
148- · simp_all
149- · split
150- · simp_all
151- · rename_i c₁ hc₁ ih _ _ _ _ _ c₂ hc₂
152- obtain rfl : c₁ = c₂ := by rw [← Option.some_inj, ← hc₁, ← hc₂]
153- apply ih
154-
155126@[simp]
156127theorem ByteArray.utf8Decode?_empty : ByteArray.empty.utf8Decode? = some #[] := by
157128 simp [utf8Decode?, utf8Decode?.go]
158129
159- private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat} { hi : i ≤ b.size} {hf } {acc : Array Char} :
160- (ByteArray.utf8Decode?.go b fuel i acc hi hf ).isSome ↔ IsValidUTF8 (b.extract i b.size) := by
130+ private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {hi : i ≤ b.size} {acc : Array Char} :
131+ (ByteArray.utf8Decode?.go b i acc hi).isSome ↔ IsValidUTF8 (b.extract i b.size) := by
161132 fun_induction ByteArray.utf8Decode?.go with
162- | case1 => simp
163- | case2 fuel i hi hf acc h₁ h₂ =>
133+ | case1 i hi acc h₁ h₂ =>
164134 simp only [Option.isSome_none, Bool.false_eq_true, false_iff]
165135 rintro ⟨l, hl⟩
166136 have : l ≠ [] := by
@@ -170,7 +140,7 @@ private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat
170140 rw [← l.cons_head_tail this] at hl
171141 rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract, hl, List.utf8DecodeChar?_utf8Encode_cons] at h₂
172142 simp at h₂
173- | case3 i acc hi fuel hf h₁ c h₂ ih =>
143+ | case2 i acc hi h₁ c h₂ ih =>
174144 rw [ih]
175145 have h₂' := h₂
176146 rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂'
@@ -179,6 +149,9 @@ private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat
179149 (le_size_of_utf8DecodeChar?_eq_some h₂)] at hl ⊢
180150 rw [ByteArray.append_inj_left hl (by have := le_size_of_utf8DecodeChar?_eq_some h₂; simp; omega),
181151 ← List.utf8Encode_singleton, isValidUTF8_utf8Encode_singleton_append_iff]
152+ | case3 i =>
153+ have : i = b.size := by omega
154+ simp [*]
182155
183156theorem ByteArray.isSome_utf8Decode?_iff {b : ByteArray} :
184157 b.utf8Decode?.isSome ↔ IsValidUTF8 b := by
@@ -305,27 +278,21 @@ theorem String.length_toList {s : String} : s.toList.length = s.length := (rfl)
305278@[deprecated String.length_toList (since := "2025-10-30")]
306279theorem String.length_data {b : String} : b.toList.length = b.length := (rfl)
307280
308- private theorem ByteArray.utf8Decode?go_eq_utf8Decode?go_extract {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
309- utf8Decode?.go b fuel i acc hi hf = (utf8Decode?.go (b.extract i b.size) fuel 0 #[] (by simp) (by simp [hf])).map (acc ++ ·) := by
310- fun_cases utf8Decode?.go b fuel i acc hi hf with
311- | case1 =>
312- rw [utf8Decode?.go]
313- simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
314- List.nil_append]
315- rw [if_pos (by omega)]
316- simp
317- | case2 fuel hf₁ h₁ h₂ hf₂ =>
281+ private theorem ByteArray.utf8Decode?go_eq_utf8Decode?go_extract {b : ByteArray} {hi : i ≤ b.size} {acc : Array Char} :
282+ utf8Decode?.go b i acc hi = (utf8Decode?.go (b.extract i b.size) 0 #[] (by simp)).map (acc ++ ·) := by
283+ fun_cases utf8Decode?.go b i acc hi with
284+ | case1 h₁ h₂ =>
318285 rw [utf8Decode?.go]
319286 simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
320287 List.nil_append]
321- rw [if_neg (by omega)]
288+ rw [if_pos (by omega)]
322289 rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
323290 split <;> simp_all
324- | case3 fuel hf₁ h₁ c h₂ hf ₂ =>
291+ | case2 h₁ c h₂ =>
325292 conv => rhs; rw [utf8Decode?.go]
326293 simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
327294 List.nil_append]
328- rw [if_neg (by omega)]
295+ rw [if_pos (by omega)]
329296 rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
330297 split
331298 · simp_all
@@ -338,20 +305,25 @@ private theorem ByteArray.utf8Decode?go_eq_utf8Decode?go_extract {b : ByteArray}
338305 simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Option.map_map, ByteArray.extract_extract]
339306 have : (fun x => acc ++ x) ∘ (fun x => #[c] ++ x) = fun x => acc.push c ++ x := by funext; simp
340307 simp [(by omega : i + (b.size - i) = b.size), this]
341- termination_by fuel
308+ | case3 =>
309+ rw [utf8Decode?.go]
310+ simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
311+ List.nil_append]
312+ rw [if_neg (by omega)]
313+ simp
314+ termination_by b.size - i
342315
343316theorem ByteArray.utf8Decode?_utf8Encode_singleton_append {l : ByteArray} {c : Char} :
344317 ([c].utf8Encode ++ l).utf8Decode? = l.utf8Decode?.map (#[c] ++ ·) := by
345318 rw [utf8Decode?, utf8Decode?.go,
346- if_neg (by simp [List.utf8Encode_singleton]; have := c.utf8Size_pos; omega)]
319+ if_pos (by simp [List.utf8Encode_singleton]; have := c.utf8Size_pos; omega)]
347320 split
348321 · simp_all [List.utf8DecodeChar?_utf8Encode_singleton_append]
349322 · rename_i d h
350323 obtain rfl : c = d := by simpa [List.utf8DecodeChar?_utf8Encode_singleton_append] using h
351324 rw [utf8Decode?go_eq_utf8Decode?go_extract, utf8Decode?]
352325 simp only [List.push_toArray, List.nil_append, Nat.zero_add]
353- congr 1
354- apply ByteArray.utf8Decode?.go.congr _ rfl rfl
326+ congr 2
355327 apply extract_append_eq_right _ (by simp)
356328 simp [List.utf8Encode_singleton]
357329
0 commit comments