Skip to content

Commit a5eb28a

Browse files
theck13dmsnell
authored andcommitted
Update diff match patch with decode URI and H(null)L pattern guard
1 parent 94e5da4 commit a5eb28a

File tree

1 file changed

+117
-12
lines changed

1 file changed

+117
-12
lines changed

Simperium/src/main/java/name/fraser/neil/plaintext/diff_match_patch.java

+117-12
Original file line numberDiff line numberDiff line change
@@ -1432,17 +1432,20 @@ public String diff_toDelta(LinkedList<Diff> diffs) {
14321432
char lastEnd = 0;
14331433
boolean isFirst = true;
14341434
for (Diff aDiff : diffs) {
1435+
if (aDiff.text.isEmpty()) {
1436+
continue;
1437+
}
14351438
char thisTop = aDiff.text.charAt(0);
14361439
char thisEnd = aDiff.text.charAt(aDiff.text.length() - 1);
14371440
if (Character.isHighSurrogate(thisEnd)) {
1441+
lastEnd = thisEnd;
14381442
aDiff.text = aDiff.text.substring(0, aDiff.text.length() - 1);
14391443
}
1440-
if (! isFirst && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) {
1444+
if (!isFirst && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) {
14411445
aDiff.text = lastEnd + aDiff.text;
14421446
}
14431447
isFirst = false;
1444-
lastEnd = thisEnd;
1445-
if ( aDiff.text.isEmpty() ) {
1448+
if (aDiff.text.isEmpty()) {
14461449
continue;
14471450
}
14481451
switch (aDiff.operation) {
@@ -1472,6 +1475,92 @@ public String diff_toDelta(LinkedList<Diff> diffs) {
14721475
return delta;
14731476
}
14741477

1478+
private int digit16(char c) throws IllegalArgumentException {
1479+
switch (c) {
1480+
case '0': return 0;
1481+
case '1': return 1;
1482+
case '2': return 2;
1483+
case '3': return 3;
1484+
case '4': return 4;
1485+
case '5': return 5;
1486+
case '6': return 6;
1487+
case '7': return 7;
1488+
case '8': return 8;
1489+
case '9': return 9;
1490+
case 'A': case 'a': return 10;
1491+
case 'B': case 'b': return 11;
1492+
case 'C': case 'c': return 12;
1493+
case 'D': case 'd': return 13;
1494+
case 'E': case 'e': return 14;
1495+
case 'F': case 'f': return 15;
1496+
default: throw new IllegalArgumentException();
1497+
}
1498+
}
1499+
1500+
private String decodeURI(String text) throws IllegalArgumentException {
1501+
int i = 0;
1502+
StringBuilder decoded = new StringBuilder(text.length());
1503+
while (i < text.length()) {
1504+
if (text.charAt(i) != '%') {
1505+
decoded.append(text.charAt(i++));
1506+
continue;
1507+
}
1508+
// start a percent-sequence
1509+
int byte1 = (digit16(text.charAt(i + 1)) << 4) + digit16(text.charAt(i + 2));
1510+
if ((byte1 & 0x80) == 0) {
1511+
decoded.append(Character.toChars(byte1));
1512+
i += 3;
1513+
continue;
1514+
}
1515+
if (text.charAt(i + 3) != '%') {
1516+
throw new IllegalArgumentException();
1517+
}
1518+
int byte2 = (digit16(text.charAt(i + 4)) << 4) + digit16(text.charAt(i + 5));
1519+
if ((byte2 & 0xC0) != 0x80) {
1520+
throw new IllegalArgumentException();
1521+
}
1522+
byte2 = byte2 & 0x3F;
1523+
if ((byte1 & 0xE0) == 0xC0) {
1524+
decoded.append(Character.toChars(((byte1 & 0x1F) << 6) | byte2));
1525+
i += 6;
1526+
continue;
1527+
}
1528+
if (text.charAt(i + 6) != '%') {
1529+
throw new IllegalArgumentException();
1530+
}
1531+
int byte3 = (digit16(text.charAt(i + 7)) << 4) + digit16(text.charAt(i + 8));
1532+
if ((byte3 & 0xC0) != 0x80) {
1533+
throw new IllegalArgumentException();
1534+
}
1535+
byte3 = byte3 & 0x3F;
1536+
if ((byte1 & 0xF0) == 0xE0) {
1537+
// unpaired surrogate are fine here
1538+
decoded.append(Character.toChars(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3));
1539+
i += 9;
1540+
continue;
1541+
}
1542+
if (text.charAt(i + 9) != '%') {
1543+
throw new IllegalArgumentException();
1544+
}
1545+
int byte4 = (digit16(text.charAt(i + 10)) << 4) + digit16(text.charAt(i + 11));
1546+
if ((byte4 & 0xC0) != 0x80) {
1547+
throw new IllegalArgumentException();
1548+
}
1549+
byte4 = byte4 & 0x3F;
1550+
if ((byte1 & 0xF8) == 0xF0) {
1551+
int codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4;
1552+
if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
1553+
decoded.append(Character.toChars((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800));
1554+
decoded.append(Character.toChars(0xDC00 | (codePoint & 0xFFFF) & 0x3FF));
1555+
i += 12;
1556+
continue;
1557+
}
1558+
}
1559+
throw new IllegalArgumentException();
1560+
}
1561+
return decoded.toString();
1562+
}
1563+
14751564
/**
14761565
* Given the original text1, and an encoded string which describes the
14771566
* operations required to transform text1 into text2, compute the full diff.
@@ -1485,7 +1574,8 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
14851574
LinkedList<Diff> diffs = new LinkedList<Diff>();
14861575
int pointer = 0; // Cursor in text1
14871576
String[] tokens = delta.split("\t");
1488-
for (String token : tokens) {
1577+
for (int x = 0; x < tokens.length; x++) {
1578+
String token = tokens[x];
14891579
if (token.length() == 0) {
14901580
// Blank tokens are ok (from a trailing \t).
14911581
continue;
@@ -1498,10 +1588,7 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
14981588
// decode would change all "+" to " "
14991589
param = param.replace("+", "%2B");
15001590
try {
1501-
param = URLDecoder.decode(param, "UTF-8");
1502-
} catch (UnsupportedEncodingException e) {
1503-
// Not likely on modern system.
1504-
throw new Error("This system does not support UTF-8.", e);
1591+
param = this.decodeURI(param);
15051592
} catch (IllegalArgumentException e) {
15061593
// Malformed URI sequence.
15071594
throw new IllegalArgumentException(
@@ -1524,6 +1611,27 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
15241611
"Negative number in diff_fromDelta: " + param);
15251612
}
15261613
String text;
1614+
// some objective-c versions of the library produced patches with
1615+
// (null) in the place where surrogates were split across diff
1616+
// boundaries. if we leave those in we'll be stuck with a
1617+
// high-surrogate (null) low-surrogate pattern that will break
1618+
// deeper in the library or consuming application. we'll "fix"
1619+
// these by dropping the (null) and re-joining the surrogate halves
1620+
if (x + 2 < tokens.length &&
1621+
Character.isHighSurrogate(text1.charAt(pointer + n - 1)) &&
1622+
tokens[x + 1].substring(1).equals("(null)") &&
1623+
Character.isLowSurrogate(text1.charAt(pointer + n))) {
1624+
n -= 1;
1625+
tokens[x + 1] = "+";
1626+
int m;
1627+
try {
1628+
m = Integer.parseInt(tokens[x + 2].substring(1));
1629+
} catch (NumberFormatException e) {
1630+
throw new IllegalArgumentException(
1631+
"Invalid number in diff_fromDelta: " + tokens[x + 2].substring(1), e);
1632+
}
1633+
tokens[x + 2] = tokens[x + 2].charAt(0) + String.valueOf(m + 1);
1634+
}
15271635
try {
15281636
text = text1.substring(pointer, pointer += n);
15291637
} catch (StringIndexOutOfBoundsException e) {
@@ -2284,10 +2392,7 @@ public List<Patch> patch_fromText(String textline)
22842392
line = text.getFirst().substring(1);
22852393
line = line.replace("+", "%2B"); // decode would change all "+" to " "
22862394
try {
2287-
line = URLDecoder.decode(line, "UTF-8");
2288-
} catch (UnsupportedEncodingException e) {
2289-
// Not likely on modern system.
2290-
throw new Error("This system does not support UTF-8.", e);
2395+
line = this.decodeURI(line);
22912396
} catch (IllegalArgumentException e) {
22922397
// Malformed URI sequence.
22932398
throw new IllegalArgumentException(

0 commit comments

Comments
 (0)