@@ -121,27 +121,125 @@ Guarantees that all actions in the output will be maximally grouped; that is, in
121121`#[(.insert, "a"), (.insert, "b")]`, it will return `#[(.insert, "ab")]`.
122122-/
123123partial def readableDiff (s s' : String) : Array (Diff.Action × String) :=
124- -- TODO: add additional diff granularities
125124 let minLength := min s.length s'.length
126- -- The coefficient on this value can be tuned:
127- let maxCharDiffDistance := minLength
125+ -- The coefficient on these values can be tuned:
126+ let maxCharDiffDistance := minLength / 2
127+ let maxWordDiffDistance := minLength + (max s.length s'.length - minLength) / 2
128128
129- let charDiff := Diff.diff (splitChars s) (splitChars s')
129+ let charDiffRaw := Diff.diff (splitChars s) (splitChars s')
130130 -- Note: this is just a rough heuristic, since the diff has no notion of substitution
131- let approxEditDistance := charDiff.filter (·.1 != .skip) |>.size
132- let preStrDiff := joinEdits charDiff
131+ let approxEditDistance := charDiffRaw.filter (·.1 != .skip) |>.size
132+ let charArrDiff := joinEdits charDiffRaw
133+
133134 -- Given that `Diff.diff` returns a minimal diff, any length-≤3 diff can only have edits at the
134135 -- front and back, or at a single interior point. This will always be fairly readable (and
135- -- splitting by a larger unit would likely only be worse). Otherwise, we should only use the
136- -- per-character diff if the edit distance isn't so large that it will be hard to read:
137- if preStrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then
138- preStrDiff.map fun (act, cs) => (act, String.mk cs.toList)
136+ -- splitting by a larger unit would likely only be worse)
137+ if charArrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then
138+ charArrDiff.map fun (act, cs) => (act, String.mk cs.toList)
139+ else if approxEditDistance ≤ maxWordDiffDistance then
140+ wordDiff
139141 else
140- #[(.delete, s), (.insert, s')]
142+ maxDiff
141143where
144+ /-
145+ Note on whitespace insertion:
146+ Because we display diffs fully inline, we must trade off between accurately rendering changes to
147+ whitespace and accurately previewing what will be inserted. We err on the side of the latter.
148+ Within a "run" of deletions or insertions, we maintain the whitespace from the deleted/inserted
149+ text and mark it as a deletion/insertion. After an unchanged word or a substitution (i.e., a
150+ deletion and insertion without an intervening unchanged word), we use the whitespace from the
151+ new version but mark it as unchanged, since there was also whitespace (of some possibly different
152+ form) here originally too. Within a substitution, we omit whitespace entirely. After an insertion,
153+ we show the new whitespace but mark it as an insertion; after a deletion, we render the old
154+ whitespace as a deletion unless it contains a newline, in which case it is omitted (as rendering a
155+ deleted newline still visually suggests a line break in the new output).
156+ -/
157+ wordDiff := Id.run do
158+ let (words, wss) := splitWords s
159+ let (words', wss') := splitWords s'
160+ let diff := Diff.diff words words'
161+ let mut withWs := #[]
162+ let mut (wssIdx, wss'Idx) := (0 , 0 )
163+ let mut inSubst := false
164+ for h : diffIdx in [:diff.size] do
165+ let (a₁, s₁) := diff[diffIdx]
166+ withWs := withWs.push (a₁, s₁)
167+ if let some (a₂, s₂) := diff[diffIdx + 1 ]? then
168+ match a₁, a₂ with
169+ | .skip, .delete =>
170+ -- Unchanged word: use new whitespace unless this is followed by a deleted terminal
171+ -- substring of the old, in which case use the old whitespace
172+ if let some ws := wss'[wss'Idx]? then
173+ withWs := withWs.push (.skip, ws)
174+ else
175+ let ws := wss[wssIdx]!
176+ withWs := withWs.push (.delete, ws)
177+ wssIdx := wssIdx + 1
178+ wss'Idx := wss'Idx + 1
179+ | .skip, .skip | .skip, .insert =>
180+ -- Unchanged word: new has white space here (use it); old does too so long as we haven't
181+ -- reached an appended terminal new portion
182+ let ws := wss'[wss'Idx]!
183+ withWs := withWs.push (.skip, ws)
184+ wssIdx := wssIdx + 1
185+ wss'Idx := wss'Idx + 1
186+ | .insert, .insert =>
187+ -- Insertion separator: include, and mark it as inserted
188+ let ws := wss'[wss'Idx]!
189+ withWs := withWs.push (.insert, ws)
190+ wss'Idx := wss'Idx + 1
191+ | .insert, .skip =>
192+ -- End of insertion: if this was a substitution, new and old have whitespace here, and we
193+ -- take the new one; if it wasn't, only new has whitespace here
194+ let ws := wss'[wss'Idx]!
195+ let act := if inSubst then .skip else .insert
196+ withWs := withWs.push (act, ws)
197+ wss'Idx := wss'Idx + 1
198+ if inSubst then wssIdx := wssIdx + 1
199+ inSubst := false
200+ | .delete, .delete =>
201+ -- Deletion separator: include and mark as deleted
202+ let ws := wss[wssIdx]!
203+ withWs := withWs.push (.delete, ws)
204+ wssIdx := wssIdx + 1
205+ | .delete, .skip =>
206+ -- End of deletion: include the deletion's whitespace as deleted iff it is not a newline
207+ -- (see earlier note); in principle, we should never have a substitution ending with a
208+ -- deletion (`diff` should prefer `a̵b̲` to `b̲a̵`), but we handle this in case `diff` changes
209+ let ws := wss[wssIdx]!
210+ unless inSubst || ws.contains '\n ' do
211+ withWs := withWs.push (.delete, ws)
212+ wssIdx := wssIdx + 1
213+ if inSubst then wss'Idx := wss'Idx + 1
214+ inSubst := false
215+ | .insert, .delete | .delete, .insert =>
216+ -- "Substitution point": don't include any whitespace, since we're switching this word
217+ inSubst := true
218+ withWs
219+ |> joinEdits
220+ |>.map fun (act, ss) => (act, ss.foldl (· ++ ·) "" )
221+
222+ maxDiff :=
223+ #[(.delete, s), (.insert, s')]
224+
142225 splitChars (s : String) : Array Char :=
143226 s.toList.toArray
144227
228+ splitWords (s : String) : Array String × Array String :=
229+ splitWordsAux s 0 0 #[] #[]
230+
231+ splitWordsAux (s : String) (b : String.Pos) (i : String.Pos) (r ws : Array String) : Array String × Array String :=
232+ if h : s.atEnd i then
233+ (r.push (s.extract b i), ws)
234+ else
235+ have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (String.lt_next s _)
236+ if (s.get i).isWhitespace then
237+ let skipped := (Substring.mk s i s.endPos).takeWhile (·.isWhitespace)
238+ let i' := skipped.stopPos
239+ splitWordsAux s i' i' (r.push (s.extract b i)) (ws.push (s.extract i i'))
240+ else
241+ splitWordsAux s b (s.next i) r ws
242+
145243 joinEdits {α} (ds : Array (Diff.Action × α)) : Array (Diff.Action × Array α) :=
146244 ds.foldl (init := #[]) fun acc (act, c) =>
147245 if h : acc.isEmpty then
0 commit comments