Skip to content

Commit f0354f0

Browse files
committed
feat: add word-level hint suggestion diffs
1 parent c6cad5f commit f0354f0

File tree

3 files changed

+182
-13
lines changed

3 files changed

+182
-13
lines changed

src/Lean/Meta/Hint.lean

Lines changed: 109 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -121,27 +121,125 @@ Guarantees that all actions in the output will be maximally grouped; that is, in
121121
`#[(.insert, "a"), (.insert, "b")]`, it will return `#[(.insert, "ab")]`.
122122
-/
123123
partial def readableDiff (s s' : String) : Array (Diff.Action × String) :=
124-
-- TODO: add additional diff granularities
125124
let minLength := min s.length s'.length
126-
-- The coefficient on this value can be tuned:
127-
let maxCharDiffDistance := minLength
125+
-- The coefficient on these values can be tuned:
126+
let maxCharDiffDistance := minLength / 2
127+
let maxWordDiffDistance := minLength + (max s.length s'.length - minLength) / 2
128128

129-
let charDiff := Diff.diff (splitChars s) (splitChars s')
129+
let charDiffRaw := Diff.diff (splitChars s) (splitChars s')
130130
-- Note: this is just a rough heuristic, since the diff has no notion of substitution
131-
let approxEditDistance := charDiff.filter (·.1 != .skip) |>.size
132-
let preStrDiff := joinEdits charDiff
131+
let approxEditDistance := charDiffRaw.filter (·.1 != .skip) |>.size
132+
let charArrDiff := joinEdits charDiffRaw
133+
133134
-- Given that `Diff.diff` returns a minimal diff, any length-≤3 diff can only have edits at the
134135
-- front and back, or at a single interior point. This will always be fairly readable (and
135-
-- splitting by a larger unit would likely only be worse). Otherwise, we should only use the
136-
-- per-character diff if the edit distance isn't so large that it will be hard to read:
137-
if preStrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then
138-
preStrDiff.map fun (act, cs) => (act, String.mk cs.toList)
136+
-- splitting by a larger unit would likely only be worse)
137+
if charArrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then
138+
charArrDiff.map fun (act, cs) => (act, String.mk cs.toList)
139+
else if approxEditDistance ≤ maxWordDiffDistance then
140+
wordDiff
139141
else
140-
#[(.delete, s), (.insert, s')]
142+
maxDiff
141143
where
144+
/-
145+
Note on whitespace insertion:
146+
Because we display diffs fully inline, we must trade off between accurately rendering changes to
147+
whitespace and accurately previewing what will be inserted. We err on the side of the latter.
148+
Within a "run" of deletions or insertions, we maintain the whitespace from the deleted/inserted
149+
text and mark it as a deletion/insertion. After an unchanged word or a substitution (i.e., a
150+
deletion and insertion without an intervening unchanged word), we use the whitespace from the
151+
new version but mark it as unchanged, since there was also whitespace (of some possibly different
152+
form) here originally too. Within a substitution, we omit whitespace entirely. After an insertion,
153+
we show the new whitespace but mark it as an insertion; after a deletion, we render the old
154+
whitespace as a deletion unless it contains a newline, in which case it is omitted (as rendering a
155+
deleted newline still visually suggests a line break in the new output).
156+
-/
157+
wordDiff := Id.run do
158+
let (words, wss) := splitWords s
159+
let (words', wss') := splitWords s'
160+
let diff := Diff.diff words words'
161+
let mut withWs := #[]
162+
let mut (wssIdx, wss'Idx) := (0, 0)
163+
let mut inSubst := false
164+
for h : diffIdx in [:diff.size] do
165+
let (a₁, s₁) := diff[diffIdx]
166+
withWs := withWs.push (a₁, s₁)
167+
if let some (a₂, s₂) := diff[diffIdx + 1]? then
168+
match a₁, a₂ with
169+
| .skip, .delete =>
170+
-- Unchanged word: use new whitespace unless this is followed by a deleted terminal
171+
-- substring of the old, in which case use the old whitespace
172+
if let some ws := wss'[wss'Idx]? then
173+
withWs := withWs.push (.skip, ws)
174+
else
175+
let ws := wss[wssIdx]!
176+
withWs := withWs.push (.delete, ws)
177+
wssIdx := wssIdx + 1
178+
wss'Idx := wss'Idx + 1
179+
| .skip, .skip | .skip, .insert =>
180+
-- Unchanged word: new has white space here (use it); old does too so long as we haven't
181+
-- reached an appended terminal new portion
182+
let ws := wss'[wss'Idx]!
183+
withWs := withWs.push (.skip, ws)
184+
wssIdx := wssIdx + 1
185+
wss'Idx := wss'Idx + 1
186+
| .insert, .insert =>
187+
-- Insertion separator: include, and mark it as inserted
188+
let ws := wss'[wss'Idx]!
189+
withWs := withWs.push (.insert, ws)
190+
wss'Idx := wss'Idx + 1
191+
| .insert, .skip =>
192+
-- End of insertion: if this was a substitution, new and old have whitespace here, and we
193+
-- take the new one; if it wasn't, only new has whitespace here
194+
let ws := wss'[wss'Idx]!
195+
let act := if inSubst then .skip else .insert
196+
withWs := withWs.push (act, ws)
197+
wss'Idx := wss'Idx + 1
198+
if inSubst then wssIdx := wssIdx + 1
199+
inSubst := false
200+
| .delete, .delete =>
201+
-- Deletion separator: include and mark as deleted
202+
let ws := wss[wssIdx]!
203+
withWs := withWs.push (.delete, ws)
204+
wssIdx := wssIdx + 1
205+
| .delete, .skip =>
206+
-- End of deletion: include the deletion's whitespace as deleted iff it is not a newline
207+
-- (see earlier note); in principle, we should never have a substitution ending with a
208+
-- deletion (`diff` should prefer `a̵b̲` to `b̲a̵`), but we handle this in case `diff` changes
209+
let ws := wss[wssIdx]!
210+
unless inSubst || ws.contains '\n' do
211+
withWs := withWs.push (.delete, ws)
212+
wssIdx := wssIdx + 1
213+
if inSubst then wss'Idx := wss'Idx + 1
214+
inSubst := false
215+
| .insert, .delete | .delete, .insert =>
216+
-- "Substitution point": don't include any whitespace, since we're switching this word
217+
inSubst := true
218+
withWs
219+
|> joinEdits
220+
|>.map fun (act, ss) => (act, ss.foldl (· ++ ·) "")
221+
222+
maxDiff :=
223+
#[(.delete, s), (.insert, s')]
224+
142225
splitChars (s : String) : Array Char :=
143226
s.toList.toArray
144227

228+
splitWords (s : String) : Array String × Array String :=
229+
splitWordsAux s 0 0 #[] #[]
230+
231+
splitWordsAux (s : String) (b : String.Pos) (i : String.Pos) (r ws : Array String) : Array String × Array String :=
232+
if h : s.atEnd i then
233+
(r.push (s.extract b i), ws)
234+
else
235+
have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (String.lt_next s _)
236+
if (s.get i).isWhitespace then
237+
let skipped := (Substring.mk s i s.endPos).takeWhile (·.isWhitespace)
238+
let i' := skipped.stopPos
239+
splitWordsAux s i' i' (r.push (s.extract b i)) (ws.push (s.extract i i'))
240+
else
241+
splitWordsAux s b (s.next i) r ws
242+
145243
joinEdits {α} (ds : Array (Diff.Action × α)) : Array (Diff.Action × Array α) :=
146244
ds.foldl (init := #[]) fun acc (act, c) =>
147245
if h : acc.isEmpty then

tests/lean/run/hintSuggestionMessage.lean

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ error: Invalid
194194
195195
Hint: Use one of these instead
196196
• first second thir̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵
197-
f̵i̵r̵st̵ ̵s̵econd t̵h̵i̵r̵d̵ ̵fourth fifth
197+
f̵i̵r̵s̵t̵ ̵second t̵h̵i̵r̵d̵ ̵fourth fifth
198198
• f̵i̵r̵s̵t̵ ̵s̵e̵c̵o̵n̵d̵ ̵t̵h̵i̵r̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵f̲i̲ ̲s̲e̲ ̲t̲h̲ ̲f̲o̲ ̲f̲i̲
199199
• f̵i̵r̵s̵t̵ ̵s̵e̵c̵o̵n̵d̵ ̵t̵h̵i̵r̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵s̲e̲c̲o̲n̲d̲
200200
-/
@@ -211,7 +211,7 @@ error: Invalid
211211
212212
Hint: Try one of these
213213
• let x := 4̵2̵
214-
̵3̲1̲;̲ ̵2̵ ̵*̵ ̵x
214+
̵ ̵ ̵2̵ ̵*̵3̲1̲;̲ x
215215
• l̵e̵t̵ ̵x̵ ̵:̵=̵ ̵4̵2̵
216216
̵ ̵ ̵2̵ ̵*̵ ̵x̵l̲e̲t̲ ̲(̲a̲,̲ ̲b̲,̲ ̲c̲)̲ ̲:̲=̲ ̲(̲1̲,̲ ̲2̲,̲ ̲3̲)̲
217217
̲ ̲ ̲b̲

tests/lean/run/hintWordDiff.lean

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import Lean.Meta.Hint
2+
3+
/-! # Word-level diffs in hint suggestions
4+
5+
Word-level diffs in hint suggestions require careful display of whitespace, since it is frequently
6+
impossible to both accurately render both deleted and inserted whitespace while also displaying a
7+
snippet that recognizably maps to what will be inserted. These tests exercise the behavior of the
8+
word-level diff generator to ensure that it is reasonable.
9+
-/
10+
11+
open Lean Meta Hint
12+
13+
-- We reproduce this function here because it is private in `Lean.Meta.Hint`
14+
private def mkDiffString (ds : Array (Diff.Action × String)) : String :=
15+
let rangeStrs := ds.map fun
16+
| (.insert, s) => String.mk (s.data.flatMap ([·, '\u0332'])) -- U+0332 Combining Low Line
17+
| (.delete, s) => String.mk (s.data.flatMap ([·, '\u0335'])) -- U+0335 Combining Short Stroke Overlay
18+
| (.skip , s) => s
19+
rangeStrs.foldl (· ++ ·) ""
20+
21+
/-- info: "one two t̲h̲r̲e̲e̲" -/
22+
#guard_msgs in
23+
#eval readableDiff.wordDiff "one two" "one two three" |> mkDiffString
24+
25+
/-- info: "o̵n̵e̵a̲ two t̲h̲r̲e̲e̲" -/
26+
#guard_msgs in
27+
#eval readableDiff.wordDiff "one\ntwo" "a two three" |> mkDiffString
28+
29+
/-- info: "o̵n̵e̵a̲\ntwo t̲h̲r̲e̲e̲" -/
30+
#guard_msgs in
31+
#eval readableDiff.wordDiff "one two" "a\ntwo three" |> mkDiffString
32+
33+
/-- info: "one two" -/
34+
#guard_msgs in
35+
#eval readableDiff.wordDiff "one\ntwo" "one two" |> mkDiffString
36+
37+
/-- info: "one t̵w̵o̵ ̵three" -/
38+
#guard_msgs in
39+
#eval readableDiff.wordDiff "one two three" "one three" |> mkDiffString
40+
41+
/-- info: "a b" -/
42+
#guard_msgs in
43+
#eval readableDiff.wordDiff "a b" "a b" |> mkDiffString
44+
45+
/-- info: "a ̵b̵" -/
46+
#guard_msgs in
47+
#eval readableDiff.wordDiff "a b" "a" |> mkDiffString
48+
49+
/-- info: "a̵b c" -/
50+
#guard_msgs in
51+
#eval readableDiff.wordDiff "a\nb c" "b c" |> mkDiffString
52+
53+
/--
54+
info: "a l̵o̵n̵g̵e̵r̵l̲o̲n̲g̲\tstring w̵i̵t̵h̵ ̵ ̵ ̵w̵h̵i̵t̵e̵s̵p̵a̵c̵e̵in strange\n\np̵l̵a̵c̵e̵s̵\n̵a̵n̵d̵ ̵u̵n̵u̵s̵u̵a̵l̵spaces"
55+
-/
56+
#guard_msgs in
57+
#eval readableDiff.wordDiff
58+
"a longer\nstring with whitespace\n in strange\nplaces and\n\nunusual\tspaces"
59+
"a long\tstring in strange\n\nspaces"
60+
|> mkDiffString
61+
62+
/--
63+
info: def f (̵x̵ ̵:̵ ̵N̵a̵t̵)̵x̲ := x + 1
64+
#check let c̵x̲ := 3̵1̵5̲ in
65+
f c̵x̲
66+
-/
67+
#guard_msgs in
68+
#eval IO.println <| readableDiff.wordDiff
69+
"def f (x : Nat) := x + 1\n#check let c := 31 in\n f c"
70+
"def f x := x + 1\n#check let x := 5 in\n f x"
71+
|> mkDiffString

0 commit comments

Comments
 (0)