Skip to content

Commit 846f6c3

Browse files
committed
1 parent 7c25660 commit 846f6c3

File tree

1 file changed

+153
-5
lines changed

1 file changed

+153
-5
lines changed

js/diff_match_patch_uncompressed.js

Lines changed: 153 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,15 @@ diff_match_patch.prototype.diff_levenshtein = function(diffs) {
13391339
return levenshtein;
13401340
};
13411341

1342+
diff_match_patch.prototype.isHighSurrogate = function(c) {
1343+
var v = c.charCodeAt(0);
1344+
return v >= 0xD800 && v <= 0xDBFF;
1345+
}
1346+
1347+
diff_match_patch.prototype.isLowSurrogate = function(c) {
1348+
var v = c.charCodeAt(0);
1349+
return v >= 0xDC00 && v <= 0xDFFF;
1350+
}
13421351

13431352
/**
13441353
* Crush the diff into an encoded string which describes the operations
@@ -1350,22 +1359,161 @@ diff_match_patch.prototype.diff_levenshtein = function(diffs) {
13501359
*/
13511360
diff_match_patch.prototype.diff_toDelta = function(diffs) {
13521361
var text = [];
1362+
var lastEnd;
13531363
for (var x = 0; x < diffs.length; x++) {
1354-
switch (diffs[x][0]) {
1364+
var thisDiff = diffs[x];
1365+
var thisTop = thisDiff[1][0];
1366+
var thisEnd = thisDiff[1][thisDiff[1].length - 1];
1367+
1368+
if (0 === thisDiff[1].length) {
1369+
continue;
1370+
}
1371+
1372+
// trap a trailing high-surrogate so we can
1373+
// distribute it to the successive edits
1374+
if (thisEnd && this.isHighSurrogate(thisEnd)) {
1375+
lastEnd = thisEnd;
1376+
thisDiff[1] = thisDiff[1].slice(0, -1);
1377+
}
1378+
1379+
if (lastEnd && thisTop && this.isHighSurrogate(lastEnd) && this.isLowSurrogate(thisTop)) {
1380+
thisDiff[1] = lastEnd + thisDiff[1];
1381+
}
1382+
1383+
if (0 === thisDiff[1].length) {
1384+
continue;
1385+
}
1386+
1387+
switch (thisDiff[0]) {
13551388
case DIFF_INSERT:
1356-
text[x] = '+' + encodeURI(diffs[x][1]);
1389+
text.push('+' + encodeURI(thisDiff[1]));
13571390
break;
13581391
case DIFF_DELETE:
1359-
text[x] = '-' + diffs[x][1].length;
1392+
text.push('-' + thisDiff[1].length);
13601393
break;
13611394
case DIFF_EQUAL:
1362-
text[x] = '=' + diffs[x][1].length;
1395+
text.push('=' + thisDiff[1].length);
13631396
break;
13641397
}
13651398
}
13661399
return text.join('\t').replace(/%20/g, ' ');
13671400
};
13681401

1402+
diff_match_patch.prototype.digit16 = function(c) {
1403+
switch (c) {
1404+
case '0': return 0;
1405+
case '1': return 1;
1406+
case '2': return 2;
1407+
case '3': return 3;
1408+
case '4': return 4;
1409+
case '5': return 5;
1410+
case '6': return 6;
1411+
case '7': return 7;
1412+
case '8': return 8;
1413+
case '9': return 9;
1414+
case 'A': case 'a': return 10;
1415+
case 'B': case 'b': return 11;
1416+
case 'C': case 'c': return 12;
1417+
case 'D': case 'd': return 13;
1418+
case 'E': case 'e': return 14;
1419+
case 'F': case 'f': return 15;
1420+
default: throw new Error('Invalid hex-code');
1421+
}
1422+
};
1423+
1424+
/**
1425+
* Decode URI-encoded string but allow for encoded surrogate halves
1426+
*
1427+
* diff_match_patch needs this relaxation of the requirements because
1428+
* not all libraries and versions produce valid URI strings in toDelta
1429+
* and we don't want to crash this code when the input is valid input
1430+
* but at the same time invalid utf-8
1431+
*
1432+
* @example: decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70'
1433+
* @example: decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c'
1434+
*
1435+
* @cite: @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js
1436+
*
1437+
* @param {String} text input string encoded by encodeURI() or equivalent
1438+
* @return {String}
1439+
*/
1440+
diff_match_patch.prototype.decodeURI = function(text) {
1441+
try {
1442+
return decodeURI(text);
1443+
} catch ( e ) {
1444+
var i = 0;
1445+
var decoded = '';
1446+
1447+
while (i < text.length) {
1448+
if ( text[i] !== '%' ) {
1449+
decoded += text[i++];
1450+
continue;
1451+
}
1452+
1453+
// start a percent-sequence
1454+
var byte1 = (this.digit16(text[i + 1]) << 4) + this.digit16(text[i + 2]);
1455+
if ((byte1 & 0x80) === 0) {
1456+
decoded += String.fromCharCode(byte1);
1457+
i += 3;
1458+
continue;
1459+
}
1460+
1461+
if ('%' !== text[i + 3]) {
1462+
throw new URIError('URI malformed');
1463+
}
1464+
1465+
var byte2 = (this.digit16(text[i + 4]) << 4) + this.digit16(text[i + 5]);
1466+
if ((byte2 & 0xC0) !== 0x80) {
1467+
throw new URIError('URI malformed');
1468+
}
1469+
byte2 = byte2 & 0x3F;
1470+
if ((byte1 & 0xE0) === 0xC0) {
1471+
decoded += String.fromCharCode(((byte1 & 0x1F) << 6) | byte2);
1472+
i += 6;
1473+
continue;
1474+
}
1475+
1476+
if ('%' !== text[i + 6]) {
1477+
throw new URIError('URI malformed');
1478+
}
1479+
1480+
var byte3 = (this.digit16(text[i + 7]) << 4) + this.digit16(text[i + 8]);
1481+
if ((byte3 & 0xC0) !== 0x80) {
1482+
throw new URIError('URI malformed');
1483+
}
1484+
byte3 = byte3 & 0x3F;
1485+
if ((byte1 & 0xF0) === 0xE0) {
1486+
// unpaired surrogate are fine here
1487+
decoded += String.fromCharCode(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3);
1488+
i += 9;
1489+
continue;
1490+
}
1491+
1492+
if ('%' !== text[i + 9]) {
1493+
throw new URIError('URI malformed');
1494+
}
1495+
1496+
var byte4 = (this.digit16(text[i + 10]) << 4) + this.digit16(text[i + 11]);
1497+
if ((byte4 & 0xC0) !== 0x80) {
1498+
throw new URIError('URI malformed');
1499+
}
1500+
byte4 = byte4 & 0x3F;
1501+
if ((byte1 & 0xF8) === 0xF0) {
1502+
var codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4;
1503+
if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
1504+
decoded += String.fromCharCode((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800);
1505+
decoded += String.fromCharCode(0xDC00 | (codePoint & 0xFFFF) & 0x3FF);
1506+
i += 12;
1507+
continue;
1508+
}
1509+
}
1510+
1511+
throw new URIError('URI malformed');
1512+
}
1513+
1514+
return decoded;
1515+
}
1516+
};
13691517

13701518
/**
13711519
* Given the original text1, and an encoded string which describes the
@@ -1388,7 +1536,7 @@ diff_match_patch.prototype.diff_fromDelta = function(text1, delta) {
13881536
case '+':
13891537
try {
13901538
diffs[diffsLength++] =
1391-
new diff_match_patch.Diff(DIFF_INSERT, decodeURI(param));
1539+
new diff_match_patch.Diff(DIFF_INSERT, this.decodeURI(param));
13921540
} catch (ex) {
13931541
// Malformed URI sequence.
13941542
throw new Error('Illegal escape in diff_fromDelta: ' + param);

0 commit comments

Comments
 (0)