@@ -1432,17 +1432,20 @@ public String diff_toDelta(LinkedList<Diff> diffs) {
1432
1432
char lastEnd = 0 ;
1433
1433
boolean isFirst = true ;
1434
1434
for (Diff aDiff : diffs ) {
1435
+ if (aDiff .text .isEmpty ()) {
1436
+ continue ;
1437
+ }
1435
1438
char thisTop = aDiff .text .charAt (0 );
1436
1439
char thisEnd = aDiff .text .charAt (aDiff .text .length () - 1 );
1437
1440
if (Character .isHighSurrogate (thisEnd )) {
1441
+ lastEnd = thisEnd ;
1438
1442
aDiff .text = aDiff .text .substring (0 , aDiff .text .length () - 1 );
1439
1443
}
1440
- if (! isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1444
+ if (!isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1441
1445
aDiff .text = lastEnd + aDiff .text ;
1442
1446
}
1443
1447
isFirst = false ;
1444
- lastEnd = thisEnd ;
1445
- if ( aDiff .text .isEmpty () ) {
1448
+ if (aDiff .text .isEmpty ()) {
1446
1449
continue ;
1447
1450
}
1448
1451
switch (aDiff .operation ) {
@@ -1472,6 +1475,92 @@ public String diff_toDelta(LinkedList<Diff> diffs) {
1472
1475
return delta ;
1473
1476
}
1474
1477
1478
+ private int digit16 (char c ) throws IllegalArgumentException {
1479
+ switch (c ) {
1480
+ case '0' : return 0 ;
1481
+ case '1' : return 1 ;
1482
+ case '2' : return 2 ;
1483
+ case '3' : return 3 ;
1484
+ case '4' : return 4 ;
1485
+ case '5' : return 5 ;
1486
+ case '6' : return 6 ;
1487
+ case '7' : return 7 ;
1488
+ case '8' : return 8 ;
1489
+ case '9' : return 9 ;
1490
+ case 'A' : case 'a' : return 10 ;
1491
+ case 'B' : case 'b' : return 11 ;
1492
+ case 'C' : case 'c' : return 12 ;
1493
+ case 'D' : case 'd' : return 13 ;
1494
+ case 'E' : case 'e' : return 14 ;
1495
+ case 'F' : case 'f' : return 15 ;
1496
+ default : throw new IllegalArgumentException ();
1497
+ }
1498
+ }
1499
+
1500
+ private String decodeURI (String text ) throws IllegalArgumentException {
1501
+ int i = 0 ;
1502
+ StringBuilder decoded = new StringBuilder (text .length ());
1503
+ while (i < text .length ()) {
1504
+ if (text .charAt (i ) != '%' ) {
1505
+ decoded .append (text .charAt (i ++));
1506
+ continue ;
1507
+ }
1508
+ // start a percent-sequence
1509
+ int byte1 = (digit16 (text .charAt (i + 1 )) << 4 ) + digit16 (text .charAt (i + 2 ));
1510
+ if ((byte1 & 0x80 ) == 0 ) {
1511
+ decoded .append (Character .toChars (byte1 ));
1512
+ i += 3 ;
1513
+ continue ;
1514
+ }
1515
+ if (text .charAt (i + 3 ) != '%' ) {
1516
+ throw new IllegalArgumentException ();
1517
+ }
1518
+ int byte2 = (digit16 (text .charAt (i + 4 )) << 4 ) + digit16 (text .charAt (i + 5 ));
1519
+ if ((byte2 & 0xC0 ) != 0x80 ) {
1520
+ throw new IllegalArgumentException ();
1521
+ }
1522
+ byte2 = byte2 & 0x3F ;
1523
+ if ((byte1 & 0xE0 ) == 0xC0 ) {
1524
+ decoded .append (Character .toChars (((byte1 & 0x1F ) << 6 ) | byte2 ));
1525
+ i += 6 ;
1526
+ continue ;
1527
+ }
1528
+ if (text .charAt (i + 6 ) != '%' ) {
1529
+ throw new IllegalArgumentException ();
1530
+ }
1531
+ int byte3 = (digit16 (text .charAt (i + 7 )) << 4 ) + digit16 (text .charAt (i + 8 ));
1532
+ if ((byte3 & 0xC0 ) != 0x80 ) {
1533
+ throw new IllegalArgumentException ();
1534
+ }
1535
+ byte3 = byte3 & 0x3F ;
1536
+ if ((byte1 & 0xF0 ) == 0xE0 ) {
1537
+ // unpaired surrogate are fine here
1538
+ decoded .append (Character .toChars (((byte1 & 0x0F ) << 12 ) | (byte2 << 6 ) | byte3 ));
1539
+ i += 9 ;
1540
+ continue ;
1541
+ }
1542
+ if (text .charAt (i + 9 ) != '%' ) {
1543
+ throw new IllegalArgumentException ();
1544
+ }
1545
+ int byte4 = (digit16 (text .charAt (i + 10 )) << 4 ) + digit16 (text .charAt (i + 11 ));
1546
+ if ((byte4 & 0xC0 ) != 0x80 ) {
1547
+ throw new IllegalArgumentException ();
1548
+ }
1549
+ byte4 = byte4 & 0x3F ;
1550
+ if ((byte1 & 0xF8 ) == 0xF0 ) {
1551
+ int codePoint = ((byte1 & 0x07 ) << 0x12 ) | (byte2 << 0x0C ) | (byte3 << 0x06 ) | byte4 ;
1552
+ if (codePoint >= 0x010000 && codePoint <= 0x10FFFF ) {
1553
+ decoded .append (Character .toChars ((codePoint & 0xFFFF ) >>> 10 & 0x3FF | 0xD800 ));
1554
+ decoded .append (Character .toChars (0xDC00 | (codePoint & 0xFFFF ) & 0x3FF ));
1555
+ i += 12 ;
1556
+ continue ;
1557
+ }
1558
+ }
1559
+ throw new IllegalArgumentException ();
1560
+ }
1561
+ return decoded .toString ();
1562
+ }
1563
+
1475
1564
/**
1476
1565
* Given the original text1, and an encoded string which describes the
1477
1566
* operations required to transform text1 into text2, compute the full diff.
@@ -1485,7 +1574,8 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
1485
1574
LinkedList <Diff > diffs = new LinkedList <Diff >();
1486
1575
int pointer = 0 ; // Cursor in text1
1487
1576
String [] tokens = delta .split ("\t " );
1488
- for (String token : tokens ) {
1577
+ for (int x = 0 ; x < tokens .length ; x ++) {
1578
+ String token = tokens [x ];
1489
1579
if (token .length () == 0 ) {
1490
1580
// Blank tokens are ok (from a trailing \t).
1491
1581
continue ;
@@ -1498,10 +1588,7 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
1498
1588
// decode would change all "+" to " "
1499
1589
param = param .replace ("+" , "%2B" );
1500
1590
try {
1501
- param = URLDecoder .decode (param , "UTF-8" );
1502
- } catch (UnsupportedEncodingException e ) {
1503
- // Not likely on modern system.
1504
- throw new Error ("This system does not support UTF-8." , e );
1591
+ param = this .decodeURI (param );
1505
1592
} catch (IllegalArgumentException e ) {
1506
1593
// Malformed URI sequence.
1507
1594
throw new IllegalArgumentException (
@@ -1524,6 +1611,27 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
1524
1611
"Negative number in diff_fromDelta: " + param );
1525
1612
}
1526
1613
String text ;
1614
+ // some objective-c versions of the library produced patches with
1615
+ // (null) in the place where surrogates were split across diff
1616
+ // boundaries. if we leave those in we'll be stuck with a
1617
+ // high-surrogate (null) low-surrogate pattern that will break
1618
+ // deeper in the library or consuming application. we'll "fix"
1619
+ // these by dropping the (null) and re-joining the surrogate halves
1620
+ if (x + 2 < tokens .length &&
1621
+ Character .isHighSurrogate (text1 .charAt (pointer + n - 1 )) &&
1622
+ tokens [x + 1 ].substring (1 ).equals ("(null)" ) &&
1623
+ Character .isLowSurrogate (text1 .charAt (pointer + n ))) {
1624
+ n -= 1 ;
1625
+ tokens [x + 1 ] = "+" ;
1626
+ int m ;
1627
+ try {
1628
+ m = Integer .parseInt (tokens [x + 2 ].substring (1 ));
1629
+ } catch (NumberFormatException e ) {
1630
+ throw new IllegalArgumentException (
1631
+ "Invalid number in diff_fromDelta: " + tokens [x + 2 ].substring (1 ), e );
1632
+ }
1633
+ tokens [x + 2 ] = tokens [x + 2 ].charAt (0 ) + String .valueOf (m + 1 );
1634
+ }
1527
1635
try {
1528
1636
text = text1 .substring (pointer , pointer += n );
1529
1637
} catch (StringIndexOutOfBoundsException e ) {
@@ -2284,10 +2392,7 @@ public List<Patch> patch_fromText(String textline)
2284
2392
line = text .getFirst ().substring (1 );
2285
2393
line = line .replace ("+" , "%2B" ); // decode would change all "+" to " "
2286
2394
try {
2287
- line = URLDecoder .decode (line , "UTF-8" );
2288
- } catch (UnsupportedEncodingException e ) {
2289
- // Not likely on modern system.
2290
- throw new Error ("This system does not support UTF-8." , e );
2395
+ line = this .decodeURI (line );
2291
2396
} catch (IllegalArgumentException e ) {
2292
2397
// Malformed URI sequence.
2293
2398
throw new IllegalArgumentException (
0 commit comments