diff --git a/javascript/diff_match_patch_uncompressed.js b/javascript/diff_match_patch_uncompressed.js
index f9fa82bf..940b6e1c 100644
--- a/javascript/diff_match_patch_uncompressed.js
+++ b/javascript/diff_match_patch_uncompressed.js
@@ -22,6 +22,16 @@
* @author fraser@google.com (Neil Fraser)
*/
+/**
+ * Determine if the index is inside a surrogate pair.
+ * @param {string} str The string
+ * @param {numer} idx The index
+ */
+function insideSurrogate(str, idx) {
+ var code = str.charCodeAt(idx);
+ return code >= 0xDC00 && code <= 0xDFFF;
+}
+
/**
* Class containing the diff, match and patch methods.
* @constructor
@@ -361,6 +371,11 @@ diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) {
x1++;
y1++;
}
+ if (insideSurrogate(text1, x1)) {
+ x1--;
+ y1--;
+ }
+
v1[k1_offset] = x1;
if (x1 > text1_length) {
// Ran off the right of the graph.
@@ -569,6 +584,9 @@ diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) {
}
pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin);
}
+ if (insideSurrogate(text1, pointermid)) {
+ pointermid--;
+ }
return pointermid;
};
@@ -601,6 +619,9 @@ diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) {
}
pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin);
}
+ if (insideSurrogate(text1, text1.length - pointermid)) {
+ pointermid--;
+ }
return pointermid;
};
diff --git a/javascript/tests/diff_match_patch_test.html b/javascript/tests/diff_match_patch_test.html
index 46617309..3b738b49 100644
--- a/javascript/tests/diff_match_patch_test.html
+++ b/javascript/tests/diff_match_patch_test.html
@@ -113,6 +113,7 @@
'testPatchObj',
'testPatchFromText',
'testPatchToText',
+ 'testPatchSurrogates',
'testPatchAddContext',
'testPatchMake',
'testPatchSplitMax',
diff --git a/javascript/tests/diff_match_patch_test.js b/javascript/tests/diff_match_patch_test.js
index 109e56ad..6b74e87f 100644
--- a/javascript/tests/diff_match_patch_test.js
+++ b/javascript/tests/diff_match_patch_test.js
@@ -767,6 +767,57 @@ function testPatchToText() {
strp = '@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n';
p = dmp.patch_fromText(strp);
assertEquals(strp, dmp.patch_toText(p));
+
+}
+
+function testPatchSurrogates() {
+ var p, p2, strp;
+
+ // These share the same high surrogate prefix
+ p = dmp.patch_make('\u{1F30D}', '\u{1F308}');
+ strp = dmp.patch_toText(p);
+ p2 = dmp.patch_fromText(strp);
+ assertEquivalent(p, p2);
+
+ // These share the same low surrogate suffix
+ p = dmp.patch_make('\u{10120}', '\u{10520}');
+ strp = dmp.patch_toText(p);
+ p2 = dmp.patch_fromText(strp);
+ assertEquivalent(p, p2);
+
+ // No common prefix, but later there's the same high surrogate char
+ p = dmp.patch_make('abbb\u{1F30D}', 'cbbb\u{1F308}');
+ strp = dmp.patch_toText(p);
+ p2 = dmp.patch_fromText(strp);
+ assertEquivalent(p, p2);
+
+ // No common suffix, but earlier there's the same low surrogate char
+ p = dmp.patch_make('\u{10120}aaac', '\u{10520}aaab');
+ strp = dmp.patch_toText(p);
+ p2 = dmp.patch_fromText(strp);
+ assertEquivalent(p, p2);
+
+ // No common suffix, but earlier there's the same low surrogate char
+ p = dmp.patch_make('abbb\u{10120}aaac', '\u{10520}aaab');
+ strp = dmp.patch_toText(p);
+ p2 = dmp.patch_fromText(strp);
+ assertEquivalent(p, p2);
+
+ var padding1 = "";
+ while (padding1.length < 100) {
+ padding1 += String.fromCharCode(50 + padding1.length);
+ }
+
+ var padding2 = "";
+ while (padding2.length < 100) {
+ padding2 += String.fromCharCode(200 + padding2.length);
+ }
+
+ // Add some random padding
+ p = dmp.patch_make(padding1+'\u{10120}'+padding2, padding2+'\u{10520}'+padding1);
+ strp = dmp.patch_toText(p);
+ p2 = dmp.patch_fromText(strp);
+ assertEquivalent(p, p2);
}
function testPatchAddContext() {