diff --git a/AUTHORS b/AUTHORS index c82809e7..90c65e95 100644 --- a/AUTHORS +++ b/AUTHORS @@ -7,4 +7,4 @@ Duncan Cross (Lua port) Jan Weiß (Objective C port) Matthaeus G. Chajdas (C# port) Mike Slemmer (C++ port) - +Scott Aron Bloom (C++11 port-remove Qt dependency) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index 64f270c3..3e93ac95 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -23,50 +23,52 @@ #include #include "diff_match_patch.h" - ////////////////////////// // // Diff Class // ////////////////////////// - /** * Constructor. Initializes the diff with the provided values. * @param operation One of INSERT, DELETE or EQUAL * @param text The text being applied */ -Diff::Diff(Operation _operation, const QString &_text) : - operation(_operation), text(_text) { - // Construct a diff with the specified operation and text. +Diff::Diff( Operation _operation, const QString &_text ) : + operation( _operation ), + text( _text ) +{ + // Construct a diff with the specified operation and text. } -Diff::Diff() { +Diff::Diff() +{ } - -QString Diff::strOperation(Operation op) { - switch (op) { - case INSERT: - return "INSERT"; - case DELETE: - return "DELETE"; - case EQUAL: - return "EQUAL"; - } - throw "Invalid operation."; +QString Diff::strOperation( Operation op ) +{ + switch ( op ) + { + case INSERT: + return "INSERT"; + case DELETE: + return "DELETE"; + case EQUAL: + return "EQUAL"; + } + throw "Invalid operation."; } /** * Display a human-readable version of this Diff. * @return text version */ -QString Diff::toString() const { - QString prettyText = text; - // Replace linebreaks with Pilcrow signs. - prettyText.replace('\n', L'\u00b6'); - return QString("Diff(") + strOperation(operation) + QString(",\"") - + prettyText + QString("\")"); +QString Diff::toString() const +{ + QString prettyText = text; + // Replace linebreaks with Pilcrow signs. + prettyText.replace( '\n', L'\u00b6' ); + return QString( "Diff(" ) + strOperation( operation ) + QString( ",\"" ) + prettyText + QString( "\")" ); } /** @@ -74,86 +76,97 @@ QString Diff::toString() const { * @param d Another Diff to compare against * @return true or false */ -bool Diff::operator==(const Diff &d) const { - return (d.operation == this->operation) && (d.text == this->text); +bool Diff::operator==( const Diff &d ) const +{ + return ( d.operation == this->operation ) && ( d.text == this->text ); } -bool Diff::operator!=(const Diff &d) const { - return !(operator == (d)); +bool Diff::operator!=( const Diff &d ) const +{ + return !( operator==( d ) ); } - ///////////////////////////////////////////// // // Patch Class // ///////////////////////////////////////////// - /** * Constructor. Initializes with an empty list of diffs. */ Patch::Patch() : - start1(0), start2(0), - length1(0), length2(0) { + start1( 0 ), + start2( 0 ), + length1( 0 ), + length2( 0 ) +{ } -bool Patch::isNull() const { - if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 - && diffs.size() == 0) { - return true; - } - return false; +bool Patch::isNull() const +{ + if ( start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && diffs.size() == 0 ) + { + return true; + } + return false; } - /** * Emulate GNU diff's format. * Header: @@ -382,8 +481,9 @@ * Indices are printed as 1-based, not 0-based. * @return The GNU diff string */ -QString Patch::toString() { - QString coords1, coords2; - if (length1 == 0) { - coords1 = QString::number(start1) + QString(",0"); - } else if (length1 == 1) { - coords1 = QString::number(start1 + 1); - } else { - coords1 = QString::number(start1 + 1) + QString(",") - + QString::number(length1); - } - if (length2 == 0) { - coords2 = QString::number(start2) + QString(",0"); - } else if (length2 == 1) { - coords2 = QString::number(start2 + 1); - } else { - coords2 = QString::number(start2 + 1) + QString(",") - + QString::number(length2); - } - QString text; - text = QString("@@ -") + coords1 + QString(" +") + coords2 - + QString(" @@\n"); - // Escape the body of the patch with %xx notation. - foreach (Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: - text += QString('+'); - break; - case DELETE: - text += QString('-'); - break; - case EQUAL: - text += QString(' '); - break; - } - text += QString(QUrl::toPercentEncoding(aDiff.text, " !~*'();/?:@&=+$,#")) - + QString("\n"); - } - - return text; -} +QString Patch::toString() +{ + QString coords1, coords2; + if ( length1 == 0 ) + { + coords1 = QString::number( start1 ) + QString( ",0" ); + } + else if ( length1 == 1 ) + { + coords1 = QString::number( start1 + 1 ); + } + else + { + coords1 = QString::number( start1 + 1 ) + QString( "," ) + QString::number( length1 ); + } + if ( length2 == 0 ) + { + coords2 = QString::number( start2 ) + QString( ",0" ); + } + else if ( length2 == 1 ) + { + coords2 = QString::number( start2 + 1 ); + } + else + { + coords2 = QString::number( start2 + 1 ) + QString( "," ) + QString::number( length2 ); + } + QString text; + text = QString( "@@ -" ) + coords1 + QString( " +" ) + coords2 + QString( " @@\n" ); + // Escape the body of the patch with %xx notation. + foreach( Diff aDiff, diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + text += QString( '+' ); + break; + case DELETE: + text += QString( '-' ); + break; + case EQUAL: + text += QString( ' ' ); + break; + } + text += QString( QUrl::toPercentEncoding( aDiff.text, " !~*'();/?:@&=+$,#" ) ) + QString( "\n" ); + } + return text; +} ///////////////////////////////////////////// // @@ -162,870 +175,974 @@ QString Patch::toString() { ///////////////////////////////////////////// diff_match_patch::diff_match_patch() : - Diff_Timeout(1.0f), - Diff_EditCost(4), - Match_Threshold(0.5f), - Match_Distance(1000), - Patch_DeleteThreshold(0.5f), - Patch_Margin(4), - Match_MaxBits(32) { + Diff_Timeout( 1.0f ), + Diff_EditCost( 4 ), + Match_Threshold( 0.5f ), + Match_Distance( 1000 ), + Patch_DeleteThreshold( 0.5f ), + Patch_Margin( 4 ), + Match_MaxBits( 32 ) +{ } - -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2) { - return diff_main(text1, text2, true); +QList< Diff > diff_match_patch::diff_main( const QString &text1, const QString &text2 ) +{ + return diff_main( text1, text2, true ); } -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2, bool checklines) { - // Set a deadline by which time the diff must be complete. - clock_t deadline; - if (Diff_Timeout <= 0) { - deadline = std::numeric_limits::max(); - } else { - deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); - } - return diff_main(text1, text2, checklines, deadline); +QList< Diff > diff_match_patch::diff_main( const QString &text1, const QString &text2, bool checklines ) +{ + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if ( Diff_Timeout <= 0 ) + { + deadline = std::numeric_limits< clock_t >::max(); + } + else + { + deadline = clock() + (clock_t)( Diff_Timeout * CLOCKS_PER_SEC ); + } + return diff_main( text1, text2, checklines, deadline ); } -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2, bool checklines, clock_t deadline) { - // Check for null inputs. - if (text1.isNull() || text2.isNull()) { - throw "Null inputs. (diff_main)"; - } +QList< Diff > diff_match_patch::diff_main( const QString &text1, const QString &text2, bool checklines, clock_t deadline ) +{ + // Check for null inputs. + if ( text1.isNull() || text2.isNull() ) + { + throw "Null inputs. (diff_main)"; + } + + // Check for equality (speedup). + QList< Diff > diffs; + if ( text1 == text2 ) + { + if ( !text1.isEmpty() ) + { + diffs.append( Diff( EQUAL, text1 ) ); + } + return diffs; + } - // Check for equality (speedup). - QList diffs; - if (text1 == text2) { - if (!text1.isEmpty()) { - diffs.append(Diff(EQUAL, text1)); + // Trim off common prefix (speedup). + int commonlength = diff_commonPrefix( text1, text2 ); + const QString &commonprefix = text1.left( commonlength ); + QString textChopped1 = text1.mid( commonlength ); + QString textChopped2 = text2.mid( commonlength ); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix( textChopped1, textChopped2 ); + const QString &commonsuffix = textChopped1.right( commonlength ); + textChopped1 = textChopped1.left( textChopped1.length() - commonlength ); + textChopped2 = textChopped2.left( textChopped2.length() - commonlength ); + + // Compute the diff on the middle block. + diffs = diff_compute( textChopped1, textChopped2, checklines, deadline ); + + // Restore the prefix and suffix. + if ( !commonprefix.isEmpty() ) + { + diffs.prepend( Diff( EQUAL, commonprefix ) ); } + if ( !commonsuffix.isEmpty() ) + { + diffs.append( Diff( EQUAL, commonsuffix ) ); + } + + diff_cleanupMerge( diffs ); + return diffs; - } - - // Trim off common prefix (speedup). - int commonlength = diff_commonPrefix(text1, text2); - const QString &commonprefix = text1.left(commonlength); - QString textChopped1 = text1.mid(commonlength); - QString textChopped2 = text2.mid(commonlength); - - // Trim off common suffix (speedup). - commonlength = diff_commonSuffix(textChopped1, textChopped2); - const QString &commonsuffix = textChopped1.right(commonlength); - textChopped1 = textChopped1.left(textChopped1.length() - commonlength); - textChopped2 = textChopped2.left(textChopped2.length() - commonlength); - - // Compute the diff on the middle block. - diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); - - // Restore the prefix and suffix. - if (!commonprefix.isEmpty()) { - diffs.prepend(Diff(EQUAL, commonprefix)); - } - if (!commonsuffix.isEmpty()) { - diffs.append(Diff(EQUAL, commonsuffix)); - } - - diff_cleanupMerge(diffs); - - return diffs; } +QList< Diff > diff_match_patch::diff_compute( QString text1, QString text2, bool checklines, clock_t deadline ) +{ + QList< Diff > diffs; -QList diff_match_patch::diff_compute(QString text1, QString text2, - bool checklines, clock_t deadline) { - QList diffs; + if ( text1.isEmpty() ) + { + // Just add some text (speedup). + diffs.append( Diff( INSERT, text2 ) ); + return diffs; + } - if (text1.isEmpty()) { - // Just add some text (speedup). - diffs.append(Diff(INSERT, text2)); - return diffs; - } + if ( text2.isEmpty() ) + { + // Just delete some text (speedup). + diffs.append( Diff( DELETE, text1 ) ); + return diffs; + } - if (text2.isEmpty()) { - // Just delete some text (speedup). - diffs.append(Diff(DELETE, text1)); - return diffs; - } + { + const QString longtext = text1.length() > text2.length() ? text1 : text2; + const QString shorttext = text1.length() > text2.length() ? text2 : text1; + const int i = longtext.indexOf( shorttext ); + if ( i != -1 ) + { + // Shorter text is inside the longer text (speedup). + const Operation op = ( text1.length() > text2.length() ) ? DELETE : INSERT; + diffs.append( Diff( op, longtext.left( i ) ) ); + diffs.append( Diff( EQUAL, shorttext ) ); + diffs.append( Diff( op, safeMid( longtext, i + shorttext.length() ) ) ); + return diffs; + } - { - const QString longtext = text1.length() > text2.length() ? text1 : text2; - const QString shorttext = text1.length() > text2.length() ? text2 : text1; - const int i = longtext.indexOf(shorttext); - if (i != -1) { - // Shorter text is inside the longer text (speedup). - const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; - diffs.append(Diff(op, longtext.left(i))); - diffs.append(Diff(EQUAL, shorttext)); - diffs.append(Diff(op, safeMid(longtext, i + shorttext.length()))); - return diffs; - } - - if (shorttext.length() == 1) { - // Single character string. - // After the previous speedup, the character can't be an equality. - diffs.append(Diff(DELETE, text1)); - diffs.append(Diff(INSERT, text2)); - return diffs; - } - // Garbage collect longtext and shorttext by scoping out. - } - - // Check to see if the problem can be split in two. - const QStringList hm = diff_halfMatch(text1, text2); - if (hm.count() > 0) { - // A half-match was found, sort out the return data. - const QString text1_a = hm[0]; - const QString text1_b = hm[1]; - const QString text2_a = hm[2]; - const QString text2_b = hm[3]; - const QString mid_common = hm[4]; - // Send both pairs off for separate processing. - const QList diffs_a = diff_main(text1_a, text2_a, - checklines, deadline); - const QList diffs_b = diff_main(text1_b, text2_b, - checklines, deadline); - // Merge the results. - diffs = diffs_a; - diffs.append(Diff(EQUAL, mid_common)); - diffs += diffs_b; - return diffs; - } + if ( shorttext.length() == 1 ) + { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.append( Diff( DELETE, text1 ) ); + diffs.append( Diff( INSERT, text2 ) ); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } - // Perform a real diff. - if (checklines && text1.length() > 100 && text2.length() > 100) { - return diff_lineMode(text1, text2, deadline); - } + // Check to see if the problem can be split in two. + const QStringList hm = diff_halfMatch( text1, text2 ); + if ( hm.count() > 0 ) + { + // A half-match was found, sort out the return data. + const QString text1_a = hm[ 0 ]; + const QString text1_b = hm[ 1 ]; + const QString text2_a = hm[ 2 ]; + const QString text2_b = hm[ 3 ]; + const QString mid_common = hm[ 4 ]; + // Send both pairs off for separate processing. + const QList< Diff > diffs_a = diff_main( text1_a, text2_a, checklines, deadline ); + const QList< Diff > diffs_b = diff_main( text1_b, text2_b, checklines, deadline ); + // Merge the results. + diffs = diffs_a; + diffs.append( Diff( EQUAL, mid_common ) ); + diffs += diffs_b; + return diffs; + } - return diff_bisect(text1, text2, deadline); -} + // Perform a real diff. + if ( checklines && text1.length() > 100 && text2.length() > 100 ) + { + return diff_lineMode( text1, text2, deadline ); + } + return diff_bisect( text1, text2, deadline ); +} -QList diff_match_patch::diff_lineMode(QString text1, QString text2, - clock_t deadline) { - // Scan the text on a line-by-line basis first. - const QList b = diff_linesToChars(text1, text2); - text1 = b[0].toString(); - text2 = b[1].toString(); - QStringList linearray = b[2].toStringList(); - - QList diffs = diff_main(text1, text2, false, deadline); - - // Convert the diff back to original text. - diff_charsToLines(diffs, linearray); - // Eliminate freak matches (e.g. blank lines) - diff_cleanupSemantic(diffs); - - // Rediff any replacement blocks, this time character-by-character. - // Add a dummy entry at the end. - diffs.append(Diff(EQUAL, "")); - int count_delete = 0; - int count_insert = 0; - QString text_delete = ""; - QString text_insert = ""; - - QMutableListIterator pointer(diffs); - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - while (thisDiff != NULL) { - switch (thisDiff->operation) { - case INSERT: - count_insert++; - text_insert += thisDiff->text; - break; - case DELETE: - count_delete++; - text_delete += thisDiff->text; - break; - case EQUAL: - // Upon reaching an equality, check for prior redundancies. - if (count_delete >= 1 && count_insert >= 1) { - // Delete the offending records and add the merged ones. - pointer.previous(); - for (int j = 0; j < count_delete + count_insert; j++) { - pointer.previous(); - pointer.remove(); - } - foreach(Diff newDiff, - diff_main(text_delete, text_insert, false, deadline)) { - pointer.insert(newDiff); - } +QList< Diff > diff_match_patch::diff_lineMode( QString text1, QString text2, clock_t deadline ) +{ + // Scan the text on a line-by-line basis first. + const QList< QVariant > b = diff_linesToChars( text1, text2 ); + text1 = b[ 0 ].toString(); + text2 = b[ 1 ].toString(); + QStringList linearray = b[ 2 ].toStringList(); + + QList< Diff > diffs = diff_main( text1, text2, false, deadline ); + + // Convert the diff back to original text. + diff_charsToLines( diffs, linearray ); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic( diffs ); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.append( Diff( EQUAL, "" ) ); + int count_delete = 0; + int count_insert = 0; + QString text_delete = ""; + QString text_insert = ""; + + QMutableListIterator< Diff > pointer( diffs ); + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + while ( thisDiff != NULL ) + { + switch ( thisDiff->operation ) + { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if ( count_delete >= 1 && count_insert >= 1 ) + { + // Delete the offending records and add the merged ones. + pointer.previous(); + for ( int j = 0; j < count_delete + count_insert; j++ ) + { + pointer.previous(); + pointer.remove(); + } + foreach( Diff newDiff, diff_main( text_delete, text_insert, false, deadline ) ) + { + pointer.insert( newDiff ); + } + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + break; } - count_insert = 0; - count_delete = 0; - text_delete = ""; - text_insert = ""; - break; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - diffs.removeLast(); // Remove the dummy entry at the end. + diffs.removeLast(); // Remove the dummy entry at the end. - return diffs; + return diffs; } +QList< Diff > diff_match_patch::diff_bisect( const QString &text1, const QString &text2, clock_t deadline ) +{ + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int max_d = ( text1_length + text2_length + 1 ) / 2; + const int v_offset = max_d; + const int v_length = 2 * max_d; + int *v1 = new int[ v_length ]; + int *v2 = new int[ v_length ]; + for ( int x = 0; x < v_length; x++ ) + { + v1[ x ] = -1; + v2[ x ] = -1; + } + v1[ v_offset + 1 ] = 0; + v2[ v_offset + 1 ] = 0; + const int delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + const bool front = ( delta % 2 != 0 ); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int k1start = 0; + int k1end = 0; + int k2start = 0; + int k2end = 0; + for ( int d = 0; d < max_d; d++ ) + { + // Bail out if deadline is reached. + if ( clock() > deadline ) + { + break; + } -QList diff_match_patch::diff_bisect(const QString &text1, - const QString &text2, clock_t deadline) { - // Cache the text lengths to prevent multiple calls. - const int text1_length = text1.length(); - const int text2_length = text2.length(); - const int max_d = (text1_length + text2_length + 1) / 2; - const int v_offset = max_d; - const int v_length = 2 * max_d; - int *v1 = new int[v_length]; - int *v2 = new int[v_length]; - for (int x = 0; x < v_length; x++) { - v1[x] = -1; - v2[x] = -1; - } - v1[v_offset + 1] = 0; - v2[v_offset + 1] = 0; - const int delta = text1_length - text2_length; - // If the total number of characters is odd, then the front path will - // collide with the reverse path. - const bool front = (delta % 2 != 0); - // Offsets for start and end of k loop. - // Prevents mapping of space beyond the grid. - int k1start = 0; - int k1end = 0; - int k2start = 0; - int k2end = 0; - for (int d = 0; d < max_d; d++) { - // Bail out if deadline is reached. - if (clock() > deadline) { - break; - } - - // Walk the front path one step. - for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { - const int k1_offset = v_offset + k1; - int x1; - if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { - x1 = v1[k1_offset + 1]; - } else { - x1 = v1[k1_offset - 1] + 1; - } - int y1 = x1 - k1; - while (x1 < text1_length && y1 < text2_length - && text1[x1] == text2[y1]) { - x1++; - y1++; - } - v1[k1_offset] = x1; - if (x1 > text1_length) { - // Ran off the right of the graph. - k1end += 2; - } else if (y1 > text2_length) { - // Ran off the bottom of the graph. - k1start += 2; - } else if (front) { - int k2_offset = v_offset + delta - k1; - if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { - // Mirror x2 onto top-left coordinate system. - int x2 = text1_length - v2[k2_offset]; - if (x1 >= x2) { - // Overlap detected. - delete [] v1; - delete [] v2; - return diff_bisectSplit(text1, text2, x1, y1, deadline); - } + // Walk the front path one step. + for ( int k1 = -d + k1start; k1 <= d - k1end; k1 += 2 ) + { + const int k1_offset = v_offset + k1; + int x1; + if ( k1 == -d || ( k1 != d && v1[ k1_offset - 1 ] < v1[ k1_offset + 1 ] ) ) + { + x1 = v1[ k1_offset + 1 ]; + } + else + { + x1 = v1[ k1_offset - 1 ] + 1; + } + int y1 = x1 - k1; + while ( x1 < text1_length && y1 < text2_length && text1[ x1 ] == text2[ y1 ] ) + { + x1++; + y1++; + } + v1[ k1_offset ] = x1; + if ( x1 > text1_length ) + { + // Ran off the right of the graph. + k1end += 2; + } + else if ( y1 > text2_length ) + { + // Ran off the bottom of the graph. + k1start += 2; + } + else if ( front ) + { + int k2_offset = v_offset + delta - k1; + if ( k2_offset >= 0 && k2_offset < v_length && v2[ k2_offset ] != -1 ) + { + // Mirror x2 onto top-left coordinate system. + int x2 = text1_length - v2[ k2_offset ]; + if ( x1 >= x2 ) + { + // Overlap detected. + delete[] v1; + delete[] v2; + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } } - } - } - - // Walk the reverse path one step. - for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { - const int k2_offset = v_offset + k2; - int x2; - if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { - x2 = v2[k2_offset + 1]; - } else { - x2 = v2[k2_offset - 1] + 1; - } - int y2 = x2 - k2; - while (x2 < text1_length && y2 < text2_length - && text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1]) { - x2++; - y2++; - } - v2[k2_offset] = x2; - if (x2 > text1_length) { - // Ran off the left of the graph. - k2end += 2; - } else if (y2 > text2_length) { - // Ran off the top of the graph. - k2start += 2; - } else if (!front) { - int k1_offset = v_offset + delta - k2; - if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { - int x1 = v1[k1_offset]; - int y1 = v_offset + x1 - k1_offset; - // Mirror x2 onto top-left coordinate system. - x2 = text1_length - x2; - if (x1 >= x2) { - // Overlap detected. - delete [] v1; - delete [] v2; - return diff_bisectSplit(text1, text2, x1, y1, deadline); - } + + // Walk the reverse path one step. + for ( int k2 = -d + k2start; k2 <= d - k2end; k2 += 2 ) + { + const int k2_offset = v_offset + k2; + int x2; + if ( k2 == -d || ( k2 != d && v2[ k2_offset - 1 ] < v2[ k2_offset + 1 ] ) ) + { + x2 = v2[ k2_offset + 1 ]; + } + else + { + x2 = v2[ k2_offset - 1 ] + 1; + } + int y2 = x2 - k2; + while ( x2 < text1_length && y2 < text2_length && text1[ text1_length - x2 - 1 ] == text2[ text2_length - y2 - 1 ] ) + { + x2++; + y2++; + } + v2[ k2_offset ] = x2; + if ( x2 > text1_length ) + { + // Ran off the left of the graph. + k2end += 2; + } + else if ( y2 > text2_length ) + { + // Ran off the top of the graph. + k2start += 2; + } + else if ( !front ) + { + int k1_offset = v_offset + delta - k2; + if ( k1_offset >= 0 && k1_offset < v_length && v1[ k1_offset ] != -1 ) + { + int x1 = v1[ k1_offset ]; + int y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if ( x1 >= x2 ) + { + // Overlap detected. + delete[] v1; + delete[] v2; + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } } - } - } - } - delete [] v1; - delete [] v2; - // Diff took too long and hit the deadline or - // number of diffs equals number of characters, no commonality at all. - QList diffs; - diffs.append(Diff(DELETE, text1)); - diffs.append(Diff(INSERT, text2)); - return diffs; + } + delete[] v1; + delete[] v2; + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + QList< Diff > diffs; + diffs.append( Diff( DELETE, text1 ) ); + diffs.append( Diff( INSERT, text2 ) ); + return diffs; } -QList diff_match_patch::diff_bisectSplit(const QString &text1, - const QString &text2, int x, int y, clock_t deadline) { - QString text1a = text1.left(x); - QString text2a = text2.left(y); - QString text1b = safeMid(text1, x); - QString text2b = safeMid(text2, y); +QList< Diff > diff_match_patch::diff_bisectSplit( const QString &text1, const QString &text2, int x, int y, clock_t deadline ) +{ + QString text1a = text1.left( x ); + QString text2a = text2.left( y ); + QString text1b = safeMid( text1, x ); + QString text2b = safeMid( text2, y ); - // Compute both diffs serially. - QList diffs = diff_main(text1a, text2a, false, deadline); - QList diffsb = diff_main(text1b, text2b, false, deadline); + // Compute both diffs serially. + QList< Diff > diffs = diff_main( text1a, text2a, false, deadline ); + QList< Diff > diffsb = diff_main( text1b, text2b, false, deadline ); - return diffs + diffsb; + return diffs + diffsb; } -QList diff_match_patch::diff_linesToChars(const QString &text1, - const QString &text2) { - QStringList lineArray; - QMap lineHash; - // e.g. linearray[4] == "Hello\n" - // e.g. linehash.get("Hello\n") == 4 - - // "\x00" is a valid character, but various debuggers don't like it. - // So we'll insert a junk entry to avoid generating a null character. - lineArray.append(""); - - const QString chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); - const QString chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); - - QList listRet; - listRet.append(QVariant::fromValue(chars1)); - listRet.append(QVariant::fromValue(chars2)); - listRet.append(QVariant::fromValue(lineArray)); - return listRet; -} +QList< QVariant > diff_match_patch::diff_linesToChars( const QString &text1, const QString &text2 ) +{ + QStringList lineArray; + QMap< QString, int > lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray.append( "" ); -QString diff_match_patch::diff_linesToCharsMunge(const QString &text, - QStringList &lineArray, - QMap &lineHash) { - int lineStart = 0; - int lineEnd = -1; - QString line; - QString chars; - // Walk the text, pulling out a substring for each line. - // text.split('\n') would would temporarily double our memory footprint. - // Modifying text would create many large strings to garbage collect. - while (lineEnd < text.length() - 1) { - lineEnd = text.indexOf('\n', lineStart); - if (lineEnd == -1) { - lineEnd = text.length() - 1; - } - line = safeMid(text, lineStart, lineEnd + 1 - lineStart); - lineStart = lineEnd + 1; - - if (lineHash.contains(line)) { - chars += QChar(static_cast(lineHash.value(line))); - } else { - lineArray.append(line); - lineHash.insert(line, lineArray.size() - 1); - chars += QChar(static_cast(lineArray.size() - 1)); - } - } - return chars; -} + const QString chars1 = diff_linesToCharsMunge( text1, lineArray, lineHash ); + const QString chars2 = diff_linesToCharsMunge( text2, lineArray, lineHash ); + QList< QVariant > listRet; + listRet.append( QVariant::fromValue( chars1 ) ); + listRet.append( QVariant::fromValue( chars2 ) ); + listRet.append( QVariant::fromValue( lineArray ) ); + return listRet; +} +QString diff_match_patch::diff_linesToCharsMunge( const QString &text, QStringList &lineArray, QMap< QString, int > &lineHash ) +{ + int lineStart = 0; + int lineEnd = -1; + QString line; + QString chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + while ( lineEnd < text.length() - 1 ) + { + lineEnd = text.indexOf( '\n', lineStart ); + if ( lineEnd == -1 ) + { + lineEnd = text.length() - 1; + } + line = safeMid( text, lineStart, lineEnd + 1 - lineStart ); + lineStart = lineEnd + 1; -void diff_match_patch::diff_charsToLines(QList &diffs, - const QStringList &lineArray) { - // Qt has no mutable foreach construct. - QMutableListIterator i(diffs); - while (i.hasNext()) { - Diff &diff = i.next(); - QString text; - for (int y = 0; y < diff.text.length(); y++) { - text += lineArray.value(static_cast(diff.text[y].unicode())); + if ( lineHash.contains( line ) ) + { + chars += QChar( static_cast< ushort >( lineHash.value( line ) ) ); + } + else + { + lineArray.append( line ); + lineHash.insert( line, lineArray.size() - 1 ); + chars += QChar( static_cast< ushort >( lineArray.size() - 1 ) ); + } } - diff.text = text; - } + return chars; } - -int diff_match_patch::diff_commonPrefix(const QString &text1, - const QString &text2) { - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const int n = std::min(text1.length(), text2.length()); - for (int i = 0; i < n; i++) { - if (text1[i] != text2[i]) { - return i; +void diff_match_patch::diff_charsToLines( QList< Diff > &diffs, const QStringList &lineArray ) +{ + // Qt has no mutable foreach construct. + QMutableListIterator< Diff > i( diffs ); + while ( i.hasNext() ) + { + Diff &diff = i.next(); + QString text; + for ( int y = 0; y < diff.text.length(); y++ ) + { + text += lineArray.value( static_cast< ushort >( diff.text[ y ].unicode() ) ); + } + diff.text = text; } - } - return n; } - -int diff_match_patch::diff_commonSuffix(const QString &text1, - const QString &text2) { - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const int text1_length = text1.length(); - const int text2_length = text2.length(); - const int n = std::min(text1_length, text2_length); - for (int i = 1; i <= n; i++) { - if (text1[text1_length - i] != text2[text2_length - i]) { - return i - 1; +int diff_match_patch::diff_commonPrefix( const QString &text1, const QString &text2 ) +{ + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int n = std::min( text1.length(), text2.length() ); + for ( int i = 0; i < n; i++ ) + { + if ( text1[ i ] != text2[ i ] ) + { + return i; + } } - } - return n; + return n; } -int diff_match_patch::diff_commonOverlap(const QString &text1, - const QString &text2) { - // Cache the text lengths to prevent multiple calls. - const int text1_length = text1.length(); - const int text2_length = text2.length(); - // Eliminate the null case. - if (text1_length == 0 || text2_length == 0) { - return 0; - } - // Truncate the longer string. - QString text1_trunc = text1; - QString text2_trunc = text2; - if (text1_length > text2_length) { - text1_trunc = text1.right(text2_length); - } else if (text1_length < text2_length) { - text2_trunc = text2.left(text1_length); - } - const int text_length = std::min(text1_length, text2_length); - // Quick check for the worst case. - if (text1_trunc == text2_trunc) { - return text_length; - } - - // Start by looking for a single character match - // and increase length until no match is found. - // Performance analysis: http://neil.fraser.name/news/2010/11/04/ - int best = 0; - int length = 1; - while (true) { - QString pattern = text1_trunc.right(length); - int found = text2_trunc.indexOf(pattern); - if (found == -1) { - return best; - } - length += found; - if (found == 0 || text1_trunc.right(length) == text2_trunc.left(length)) { - best = length; - length++; - } - } +int diff_match_patch::diff_commonSuffix( const QString &text1, const QString &text2 ) +{ + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int n = std::min( text1_length, text2_length ); + for ( int i = 1; i <= n; i++ ) + { + if ( text1[ text1_length - i ] != text2[ text2_length - i ] ) + { + return i - 1; + } + } + return n; } -QStringList diff_match_patch::diff_halfMatch(const QString &text1, - const QString &text2) { - if (Diff_Timeout <= 0) { - // Don't risk returning a non-optimal diff if we have unlimited time. - return QStringList(); - } - const QString longtext = text1.length() > text2.length() ? text1 : text2; - const QString shorttext = text1.length() > text2.length() ? text2 : text1; - if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { - return QStringList(); // Pointless. - } - - // First check if the second quarter is the seed for a half-match. - const QStringList hm1 = diff_halfMatchI(longtext, shorttext, - (longtext.length() + 3) / 4); - // Check again based on the third quarter. - const QStringList hm2 = diff_halfMatchI(longtext, shorttext, - (longtext.length() + 1) / 2); - QStringList hm; - if (hm1.isEmpty() && hm2.isEmpty()) { - return QStringList(); - } else if (hm2.isEmpty()) { - hm = hm1; - } else if (hm1.isEmpty()) { - hm = hm2; - } else { - // Both matched. Select the longest. - hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; - } - - // A half-match was found, sort out the return data. - if (text1.length() > text2.length()) { - return hm; - } else { - QStringList listRet; - listRet << hm[2] << hm[3] << hm[0] << hm[1] << hm[4]; - return listRet; - } +int diff_match_patch::diff_commonOverlap( const QString &text1, const QString &text2 ) +{ + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + // Eliminate the null case. + if ( text1_length == 0 || text2_length == 0 ) + { + return 0; + } + // Truncate the longer string. + QString text1_trunc = text1; + QString text2_trunc = text2; + if ( text1_length > text2_length ) + { + text1_trunc = text1.right( text2_length ); + } + else if ( text1_length < text2_length ) + { + text2_trunc = text2.left( text1_length ); + } + const int text_length = std::min( text1_length, text2_length ); + // Quick check for the worst case. + if ( text1_trunc == text2_trunc ) + { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + int best = 0; + int length = 1; + while ( true ) + { + QString pattern = text1_trunc.right( length ); + int found = text2_trunc.indexOf( pattern ); + if ( found == -1 ) + { + return best; + } + length += found; + if ( found == 0 || text1_trunc.right( length ) == text2_trunc.left( length ) ) + { + best = length; + length++; + } + } } +QStringList diff_match_patch::diff_halfMatch( const QString &text1, const QString &text2 ) +{ + if ( Diff_Timeout <= 0 ) + { + // Don't risk returning a non-optimal diff if we have unlimited time. + return QStringList(); + } + const QString longtext = text1.length() > text2.length() ? text1 : text2; + const QString shorttext = text1.length() > text2.length() ? text2 : text1; + if ( longtext.length() < 4 || shorttext.length() * 2 < longtext.length() ) + { + return QStringList(); // Pointless. + } -QStringList diff_match_patch::diff_halfMatchI(const QString &longtext, - const QString &shorttext, - int i) { - // Start with a 1/4 length substring at position i as a seed. - const QString seed = safeMid(longtext, i, longtext.length() / 4); - int j = -1; - QString best_common; - QString best_longtext_a, best_longtext_b; - QString best_shorttext_a, best_shorttext_b; - while ((j = shorttext.indexOf(seed, j + 1)) != -1) { - const int prefixLength = diff_commonPrefix(safeMid(longtext, i), - safeMid(shorttext, j)); - const int suffixLength = diff_commonSuffix(longtext.left(i), - shorttext.left(j)); - if (best_common.length() < suffixLength + prefixLength) { - best_common = safeMid(shorttext, j - suffixLength, suffixLength) - + safeMid(shorttext, j, prefixLength); - best_longtext_a = longtext.left(i - suffixLength); - best_longtext_b = safeMid(longtext, i + prefixLength); - best_shorttext_a = shorttext.left(j - suffixLength); - best_shorttext_b = safeMid(shorttext, j + prefixLength); - } - } - if (best_common.length() * 2 >= longtext.length()) { - QStringList listRet; - listRet << best_longtext_a << best_longtext_b << best_shorttext_a - << best_shorttext_b << best_common; - return listRet; - } else { - return QStringList(); - } + // First check if the second quarter is the seed for a half-match. + const QStringList hm1 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 3 ) / 4 ); + // Check again based on the third quarter. + const QStringList hm2 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 1 ) / 2 ); + QStringList hm; + if ( hm1.isEmpty() && hm2.isEmpty() ) + { + return QStringList(); + } + else if ( hm2.isEmpty() ) + { + hm = hm1; + } + else if ( hm1.isEmpty() ) + { + hm = hm2; + } + else + { + // Both matched. Select the longest. + hm = hm1[ 4 ].length() > hm2[ 4 ].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if ( text1.length() > text2.length() ) + { + return hm; + } + else + { + QStringList listRet; + listRet << hm[ 2 ] << hm[ 3 ] << hm[ 0 ] << hm[ 1 ] << hm[ 4 ]; + return listRet; + } } +QStringList diff_match_patch::diff_halfMatchI( const QString &longtext, const QString &shorttext, int i ) +{ + // Start with a 1/4 length substring at position i as a seed. + const QString seed = safeMid( longtext, i, longtext.length() / 4 ); + int j = -1; + QString best_common; + QString best_longtext_a, best_longtext_b; + QString best_shorttext_a, best_shorttext_b; + while ( ( j = shorttext.indexOf( seed, j + 1 ) ) != -1 ) + { + const int prefixLength = diff_commonPrefix( safeMid( longtext, i ), safeMid( shorttext, j ) ); + const int suffixLength = diff_commonSuffix( longtext.left( i ), shorttext.left( j ) ); + if ( best_common.length() < suffixLength + prefixLength ) + { + best_common = safeMid( shorttext, j - suffixLength, suffixLength ) + safeMid( shorttext, j, prefixLength ); + best_longtext_a = longtext.left( i - suffixLength ); + best_longtext_b = safeMid( longtext, i + prefixLength ); + best_shorttext_a = shorttext.left( j - suffixLength ); + best_shorttext_b = safeMid( shorttext, j + prefixLength ); + } + } + if ( best_common.length() * 2 >= longtext.length() ) + { + QStringList listRet; + listRet << best_longtext_a << best_longtext_b << best_shorttext_a << best_shorttext_b << best_common; + return listRet; + } + else + { + return QStringList(); + } +} -void diff_match_patch::diff_cleanupSemantic(QList &diffs) { - if (diffs.isEmpty()) { - return; - } - bool changes = false; - QStack equalities; // Stack of equalities. - QString lastequality; // Always equal to equalities.lastElement().text - QMutableListIterator pointer(diffs); - // Number of characters that changed prior to the equality. - int length_insertions1 = 0; - int length_deletions1 = 0; - // Number of characters that changed after the equality. - int length_insertions2 = 0; - int length_deletions2 = 0; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - while (thisDiff != NULL) { - if (thisDiff->operation == EQUAL) { - // Equality found. - equalities.push(*thisDiff); - length_insertions1 = length_insertions2; - length_deletions1 = length_deletions2; - length_insertions2 = 0; - length_deletions2 = 0; - lastequality = thisDiff->text; - } else { - // An insertion or deletion. - if (thisDiff->operation == INSERT) { - length_insertions2 += thisDiff->text.length(); - } else { - length_deletions2 += thisDiff->text.length(); - } - // Eliminate an equality that is smaller or equal to the edits on both - // sides of it. - if (!lastequality.isNull() - && (lastequality.length() - <= std::max(length_insertions1, length_deletions1)) - && (lastequality.length() - <= std::max(length_insertions2, length_deletions2))) { - // printf("Splitting: '%s'\n", qPrintable(lastequality)); - // Walk back to offending equality. - while (*thisDiff != equalities.top()) { - thisDiff = &pointer.previous(); +void diff_match_patch::diff_cleanupSemantic( QList< Diff > &diffs ) +{ + if ( diffs.isEmpty() ) + { + return; + } + bool changes = false; + QStack< Diff > equalities; // Stack of equalities. + QString lastequality; // Always equal to equalities.lastElement().text + QMutableListIterator< Diff > pointer( diffs ); + // Number of characters that changed prior to the equality. + int length_insertions1 = 0; + int length_deletions1 = 0; + // Number of characters that changed after the equality. + int length_insertions2 = 0; + int length_deletions2 = 0; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + while ( thisDiff != NULL ) + { + if ( thisDiff->operation == EQUAL ) + { + // Equality found. + equalities.push( *thisDiff ); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = thisDiff->text; } - pointer.next(); + else + { + // An insertion or deletion. + if ( thisDiff->operation == INSERT ) + { + length_insertions2 += thisDiff->text.length(); + } + else + { + length_deletions2 += thisDiff->text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if ( !lastequality.isNull() && ( lastequality.length() <= std::max( length_insertions1, length_deletions1 ) ) && ( lastequality.length() <= std::max( length_insertions2, length_deletions2 ) ) ) + { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while ( *thisDiff != equalities.top() ) + { + thisDiff = &pointer.previous(); + } + pointer.next(); - // Replace equality with a delete. - pointer.setValue(Diff(DELETE, lastequality)); - // Insert a corresponding an insert. - pointer.insert(Diff(INSERT, lastequality)); + // Replace equality with a delete. + pointer.setValue( Diff( DELETE, lastequality ) ); + // Insert a corresponding an insert. + pointer.insert( Diff( INSERT, lastequality ) ); - equalities.pop(); // Throw away the equality we just deleted. - if (!equalities.isEmpty()) { - // Throw away the previous equality (it needs to be reevaluated). - equalities.pop(); - } - if (equalities.isEmpty()) { - // There are no previous equalities, walk back to the start. - while (pointer.hasPrevious()) { - pointer.previous(); - } - } else { - // There is a safe equality we can fall back to. - thisDiff = &equalities.top(); - while (*thisDiff != pointer.previous()) { - // Intentionally empty loop. - } + equalities.pop(); // Throw away the equality we just deleted. + if ( !equalities.isEmpty() ) + { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if ( equalities.isEmpty() ) + { + // There are no previous equalities, walk back to the start. + while ( pointer.hasPrevious() ) + { + pointer.previous(); + } + } + else + { + // There is a safe equality we can fall back to. + thisDiff = &equalities.top(); + while ( *thisDiff != pointer.previous() ) + { + // Intentionally empty loop. + } + } + + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = QString(); + changes = true; + } } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } - length_insertions1 = 0; // Reset the counters. - length_deletions1 = 0; - length_insertions2 = 0; - length_deletions2 = 0; - lastequality = QString(); - changes = true; - } + // Normalize the diff. + if ( changes ) + { + diff_cleanupMerge( diffs ); } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - - // Normalize the diff. - if (changes) { - diff_cleanupMerge(diffs); - } - diff_cleanupSemanticLossless(diffs); - - // Find any overlaps between deletions and insertions. - // e.g: abcxxxxxxdef - // -> abcxxxdef - // e.g: xxxabcdefxxx - // -> defxxxabc - // Only extract an overlap if it is as big as the edit ahead or behind it. - pointer.toFront(); - Diff *prevDiff = NULL; - thisDiff = NULL; - if (pointer.hasNext()) { - prevDiff = &pointer.next(); - if (pointer.hasNext()) { - thisDiff = &pointer.next(); - } - } - while (thisDiff != NULL) { - if (prevDiff->operation == DELETE && - thisDiff->operation == INSERT) { - QString deletion = prevDiff->text; - QString insertion = thisDiff->text; - int overlap_length1 = diff_commonOverlap(deletion, insertion); - int overlap_length2 = diff_commonOverlap(insertion, deletion); - if (overlap_length1 >= overlap_length2) { - if (overlap_length1 >= deletion.length() / 2.0 || - overlap_length1 >= insertion.length() / 2.0) { - // Overlap found. Insert an equality and trim the surrounding edits. - pointer.previous(); - pointer.insert(Diff(EQUAL, insertion.left(overlap_length1))); - prevDiff->text = - deletion.left(deletion.length() - overlap_length1); - thisDiff->text = safeMid(insertion, overlap_length1); - // pointer.insert inserts the element before the cursor, so there is - // no need to step past the new element. + diff_cleanupSemanticLossless( diffs ); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer.toFront(); + Diff *prevDiff = NULL; + thisDiff = NULL; + if ( pointer.hasNext() ) + { + prevDiff = &pointer.next(); + if ( pointer.hasNext() ) + { + thisDiff = &pointer.next(); } - } else { - if (overlap_length2 >= deletion.length() / 2.0 || - overlap_length2 >= insertion.length() / 2.0) { - // Reverse overlap found. - // Insert an equality and swap and trim the surrounding edits. - pointer.previous(); - pointer.insert(Diff(EQUAL, deletion.left(overlap_length2))); - prevDiff->operation = INSERT; - prevDiff->text = - insertion.left(insertion.length() - overlap_length2); - thisDiff->operation = DELETE; - thisDiff->text = safeMid(deletion, overlap_length2); - // pointer.insert inserts the element before the cursor, so there is - // no need to step past the new element. + } + while ( thisDiff != NULL ) + { + if ( prevDiff->operation == DELETE && thisDiff->operation == INSERT ) + { + QString deletion = prevDiff->text; + QString insertion = thisDiff->text; + int overlap_length1 = diff_commonOverlap( deletion, insertion ); + int overlap_length2 = diff_commonOverlap( insertion, deletion ); + if ( overlap_length1 >= overlap_length2 ) + { + if ( overlap_length1 >= deletion.length() / 2.0 || overlap_length1 >= insertion.length() / 2.0 ) + { + // Overlap found. Insert an equality and trim the surrounding edits. + pointer.previous(); + pointer.insert( Diff( EQUAL, insertion.left( overlap_length1 ) ) ); + prevDiff->text = deletion.left( deletion.length() - overlap_length1 ); + thisDiff->text = safeMid( insertion, overlap_length1 ); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } + else + { + if ( overlap_length2 >= deletion.length() / 2.0 || overlap_length2 >= insertion.length() / 2.0 ) + { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + pointer.previous(); + pointer.insert( Diff( EQUAL, deletion.left( overlap_length2 ) ) ); + prevDiff->operation = INSERT; + prevDiff->text = insertion.left( insertion.length() - overlap_length2 ); + thisDiff->operation = DELETE; + thisDiff->text = safeMid( deletion, overlap_length2 ); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + prevDiff = thisDiff; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - prevDiff = thisDiff; - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } } +void diff_match_patch::diff_cleanupSemanticLossless( QList< Diff > &diffs ) +{ + QString equality1, edit, equality2; + QString commonString; + int commonOffset; + int score, bestScore; + QString bestEquality1, bestEdit, bestEquality2; + // Create a new iterator at the start. + QMutableListIterator< Diff > pointer( diffs ); + Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + + // Intentionally ignore the first and last element (don't need checking). + while ( nextDiff != NULL ) + { + if ( prevDiff->operation == EQUAL && nextDiff->operation == EQUAL ) + { + // This is a single edit surrounded by equalities. + equality1 = prevDiff->text; + edit = thisDiff->text; + equality2 = nextDiff->text; + + // First, shift the edit as far left as possible. + commonOffset = diff_commonSuffix( equality1, edit ); + if ( commonOffset != 0 ) + { + commonString = safeMid( edit, edit.length() - commonOffset ); + equality1 = equality1.left( equality1.length() - commonOffset ); + edit = commonString + edit.left( edit.length() - commonOffset ); + equality2 = commonString + equality2; + } -void diff_match_patch::diff_cleanupSemanticLossless(QList &diffs) { - QString equality1, edit, equality2; - QString commonString; - int commonOffset; - int score, bestScore; - QString bestEquality1, bestEdit, bestEquality2; - // Create a new iterator at the start. - QMutableListIterator pointer(diffs); - Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - - // Intentionally ignore the first and last element (don't need checking). - while (nextDiff != NULL) { - if (prevDiff->operation == EQUAL && - nextDiff->operation == EQUAL) { - // This is a single edit surrounded by equalities. - equality1 = prevDiff->text; - edit = thisDiff->text; - equality2 = nextDiff->text; - - // First, shift the edit as far left as possible. - commonOffset = diff_commonSuffix(equality1, edit); - if (commonOffset != 0) { - commonString = safeMid(edit, edit.length() - commonOffset); - equality1 = equality1.left(equality1.length() - commonOffset); - edit = commonString + edit.left(edit.length() - commonOffset); - equality2 = commonString + equality2; - } - - // Second, step character by character right, looking for the best fit. - bestEquality1 = equality1; - bestEdit = edit; - bestEquality2 = equality2; - bestScore = diff_cleanupSemanticScore(equality1, edit) - + diff_cleanupSemanticScore(edit, equality2); - while (!edit.isEmpty() && !equality2.isEmpty() - && edit[0] == equality2[0]) { - equality1 += edit[0]; - edit = safeMid(edit, 1) + equality2[0]; - equality2 = safeMid(equality2, 1); - score = diff_cleanupSemanticScore(equality1, edit) - + diff_cleanupSemanticScore(edit, equality2); - // The >= encourages trailing rather than leading whitespace on edits. - if (score >= bestScore) { - bestScore = score; + // Second, step character by character right, looking for the best fit. bestEquality1 = equality1; bestEdit = edit; bestEquality2 = equality2; - } - } + bestScore = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + while ( !edit.isEmpty() && !equality2.isEmpty() && edit[ 0 ] == equality2[ 0 ] ) + { + equality1 += edit[ 0 ]; + edit = safeMid( edit, 1 ) + equality2[ 0 ]; + equality2 = safeMid( equality2, 1 ); + score = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + // The >= encourages trailing rather than leading whitespace on edits. + if ( score >= bestScore ) + { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } - if (prevDiff->text != bestEquality1) { - // We have an improvement, save it back to the diff. - if (!bestEquality1.isEmpty()) { - prevDiff->text = bestEquality1; - } else { - pointer.previous(); // Walk past nextDiff. - pointer.previous(); // Walk past thisDiff. - pointer.previous(); // Walk past prevDiff. - pointer.remove(); // Delete prevDiff. - pointer.next(); // Walk past thisDiff. - pointer.next(); // Walk past nextDiff. - } - thisDiff->text = bestEdit; - if (!bestEquality2.isEmpty()) { - nextDiff->text = bestEquality2; - } else { - pointer.remove(); // Delete nextDiff. - nextDiff = thisDiff; - thisDiff = prevDiff; - } + if ( prevDiff->text != bestEquality1 ) + { + // We have an improvement, save it back to the diff. + if ( !bestEquality1.isEmpty() ) + { + prevDiff->text = bestEquality1; + } + else + { + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + pointer.next(); // Walk past nextDiff. + } + thisDiff->text = bestEdit; + if ( !bestEquality2.isEmpty() ) + { + nextDiff->text = bestEquality2; + } + else + { + pointer.remove(); // Delete nextDiff. + nextDiff = thisDiff; + thisDiff = prevDiff; + } + } } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; } - prevDiff = thisDiff; - thisDiff = nextDiff; - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - } } +int diff_match_patch::diff_cleanupSemanticScore( const QString &one, const QString &two ) +{ + if ( one.isEmpty() || two.isEmpty() ) + { + // Edges are the best. + return 6; + } -int diff_match_patch::diff_cleanupSemanticScore(const QString &one, - const QString &two) { - if (one.isEmpty() || two.isEmpty()) { - // Edges are the best. - return 6; - } - - // Each port of this function behaves slightly differently due to - // subtle differences in each language's definition of things like - // 'whitespace'. Since this function's purpose is largely cosmetic, - // the choice has been made to use each language's native features - // rather than force total conformity. - QChar char1 = one[one.length() - 1]; - QChar char2 = two[0]; - bool nonAlphaNumeric1 = !char1.isLetterOrNumber(); - bool nonAlphaNumeric2 = !char2.isLetterOrNumber(); - bool whitespace1 = nonAlphaNumeric1 && char1.isSpace(); - bool whitespace2 = nonAlphaNumeric2 && char2.isSpace(); - bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control; - bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control; - bool blankLine1 = lineBreak1 && BLANKLINEEND.indexIn(one) != -1; - bool blankLine2 = lineBreak2 && BLANKLINESTART.indexIn(two) != -1; - - if (blankLine1 || blankLine2) { - // Five points for blank lines. - return 5; - } else if (lineBreak1 || lineBreak2) { - // Four points for line breaks. - return 4; - } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { - // Three points for end of sentences. - return 3; - } else if (whitespace1 || whitespace2) { - // Two points for whitespace. - return 2; - } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { - // One point for non-alphanumeric. - return 1; - } - return 0; + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + QChar char1 = one[ one.length() - 1 ]; + QChar char2 = two[ 0 ]; + bool nonAlphaNumeric1 = !char1.isLetterOrNumber(); + bool nonAlphaNumeric2 = !char2.isLetterOrNumber(); + bool whitespace1 = nonAlphaNumeric1 && char1.isSpace(); + bool whitespace2 = nonAlphaNumeric2 && char2.isSpace(); + bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control; + bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control; + bool blankLine1 = lineBreak1 && BLANKLINEEND.indexIn( one ) != -1; + bool blankLine2 = lineBreak2 && BLANKLINESTART.indexIn( two ) != -1; + + if ( blankLine1 || blankLine2 ) + { + // Five points for blank lines. + return 5; + } + else if ( lineBreak1 || lineBreak2 ) + { + // Four points for line breaks. + return 4; + } + else if ( nonAlphaNumeric1 && !whitespace1 && whitespace2 ) + { + // Three points for end of sentences. + return 3; + } + else if ( whitespace1 || whitespace2 ) + { + // Two points for whitespace. + return 2; + } + else if ( nonAlphaNumeric1 || nonAlphaNumeric2 ) + { + // One point for non-alphanumeric. + return 1; + } + return 0; } - // Define some regex patterns for matching boundaries. -QRegExp diff_match_patch::BLANKLINEEND = QRegExp("\\n\\r?\\n$"); -QRegExp diff_match_patch::BLANKLINESTART = QRegExp("^\\r?\\n\\r?\\n"); - - -void diff_match_patch::diff_cleanupEfficiency(QList &diffs) { - if (diffs.isEmpty()) { - return; - } - bool changes = false; - QStack equalities; // Stack of equalities. - QString lastequality; // Always equal to equalities.lastElement().text - QMutableListIterator pointer(diffs); - // Is there an insertion operation before the last equality. - bool pre_ins = false; - // Is there a deletion operation before the last equality. - bool pre_del = false; - // Is there an insertion operation after the last equality. - bool post_ins = false; - // Is there a deletion operation after the last equality. - bool post_del = false; - - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *safeDiff = thisDiff; - - while (thisDiff != NULL) { - if (thisDiff->operation == EQUAL) { - // Equality found. - if (thisDiff->text.length() < Diff_EditCost && (post_ins || post_del)) { - // Candidate found. - equalities.push(*thisDiff); - pre_ins = post_ins; - pre_del = post_del; - lastequality = thisDiff->text; - } else { - // Not a candidate, and can never become one. - equalities.clear(); - lastequality = QString(); - safeDiff = thisDiff; - } - post_ins = post_del = false; - } else { - // An insertion or deletion. - if (thisDiff->operation == DELETE) { - post_del = true; - } else { - post_ins = true; - } - /* +QRegExp diff_match_patch::BLANKLINEEND = QRegExp( "\\n\\r?\\n$" ); +QRegExp diff_match_patch::BLANKLINESTART = QRegExp( "^\\r?\\n\\r?\\n" ); + +void diff_match_patch::diff_cleanupEfficiency( QList< Diff > &diffs ) +{ + if ( diffs.isEmpty() ) + { + return; + } + bool changes = false; + QStack< Diff > equalities; // Stack of equalities. + QString lastequality; // Always equal to equalities.lastElement().text + QMutableListIterator< Diff > pointer( diffs ); + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *safeDiff = thisDiff; + + while ( thisDiff != NULL ) + { + if ( thisDiff->operation == EQUAL ) + { + // Equality found. + if ( thisDiff->text.length() < Diff_EditCost && ( post_ins || post_del ) ) + { + // Candidate found. + equalities.push( *thisDiff ); + pre_ins = post_ins; + pre_del = post_del; + lastequality = thisDiff->text; + } + else + { + // Not a candidate, and can never become one. + equalities.clear(); + lastequality = QString(); + safeDiff = thisDiff; + } + post_ins = post_del = false; + } + else + { + // An insertion or deletion. + if ( thisDiff->operation == DELETE ) + { + post_del = true; + } + else + { + post_ins = true; + } + /* * Five types to be split: * ABXYCD * AXCD @@ -1033,1073 +1150,1205 @@ void diff_match_patch::diff_cleanupEfficiency(QList &diffs) { * AXCD * ABXC */ - if (!lastequality.isNull() - && ((pre_ins && pre_del && post_ins && post_del) - || ((lastequality.length() < Diff_EditCost / 2) - && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) - + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { - // printf("Splitting: '%s'\n", qPrintable(lastequality)); - // Walk back to offending equality. - while (*thisDiff != equalities.top()) { - thisDiff = &pointer.previous(); - } - pointer.next(); - - // Replace equality with a delete. - pointer.setValue(Diff(DELETE, lastequality)); - // Insert a corresponding an insert. - pointer.insert(Diff(INSERT, lastequality)); - thisDiff = &pointer.previous(); - pointer.next(); - - equalities.pop(); // Throw away the equality we just deleted. - lastequality = QString(); - if (pre_ins && pre_del) { - // No changes made which could affect previous entry, keep going. - post_ins = post_del = true; - equalities.clear(); - safeDiff = thisDiff; - } else { - if (!equalities.isEmpty()) { - // Throw away the previous equality (it needs to be reevaluated). - equalities.pop(); - } - if (equalities.isEmpty()) { - // There are no previous questionable equalities, - // walk back to the last known safe diff. - thisDiff = safeDiff; - } else { - // There is an equality we can fall back to. - thisDiff = &equalities.top(); - } - while (*thisDiff != pointer.previous()) { - // Intentionally empty loop. - } - post_ins = post_del = false; - } + if ( !lastequality.isNull() && ( ( pre_ins && pre_del && post_ins && post_del ) || ( ( lastequality.length() < Diff_EditCost / 2 ) && ( ( pre_ins ? 1 : 0 ) + ( pre_del ? 1 : 0 ) + ( post_ins ? 1 : 0 ) + ( post_del ? 1 : 0 ) ) == 3 ) ) ) + { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while ( *thisDiff != equalities.top() ) + { + thisDiff = &pointer.previous(); + } + pointer.next(); + + // Replace equality with a delete. + pointer.setValue( Diff( DELETE, lastequality ) ); + // Insert a corresponding an insert. + pointer.insert( Diff( INSERT, lastequality ) ); + thisDiff = &pointer.previous(); + pointer.next(); - changes = true; - } + equalities.pop(); // Throw away the equality we just deleted. + lastequality = QString(); + if ( pre_ins && pre_del ) + { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities.clear(); + safeDiff = thisDiff; + } + else + { + if ( !equalities.isEmpty() ) + { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if ( equalities.isEmpty() ) + { + // There are no previous questionable equalities, + // walk back to the last known safe diff. + thisDiff = safeDiff; + } + else + { + // There is an equality we can fall back to. + thisDiff = &equalities.top(); + } + while ( *thisDiff != pointer.previous() ) + { + // Intentionally empty loop. + } + post_ins = post_del = false; + } + + changes = true; + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - if (changes) { - diff_cleanupMerge(diffs); - } + if ( changes ) + { + diff_cleanupMerge( diffs ); + } } - -void diff_match_patch::diff_cleanupMerge(QList &diffs) { - diffs.append(Diff(EQUAL, "")); // Add a dummy entry at the end. - QMutableListIterator pointer(diffs); - int count_delete = 0; - int count_insert = 0; - QString text_delete = ""; - QString text_insert = ""; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *prevEqual = NULL; - int commonlength; - while (thisDiff != NULL) { - switch (thisDiff->operation) { - case INSERT: - count_insert++; - text_insert += thisDiff->text; - prevEqual = NULL; - break; - case DELETE: - count_delete++; - text_delete += thisDiff->text; - prevEqual = NULL; - break; - case EQUAL: - if (count_delete + count_insert > 1) { - bool both_types = count_delete != 0 && count_insert != 0; - // Delete the offending records. - pointer.previous(); // Reverse direction. - while (count_delete-- > 0) { - pointer.previous(); - pointer.remove(); - } - while (count_insert-- > 0) { - pointer.previous(); - pointer.remove(); - } - if (both_types) { - // Factor out any common prefixies. - commonlength = diff_commonPrefix(text_insert, text_delete); - if (commonlength != 0) { - if (pointer.hasPrevious()) { - thisDiff = &pointer.previous(); - if (thisDiff->operation != EQUAL) { - throw "Previous diff should have been an equality."; +void diff_match_patch::diff_cleanupMerge( QList< Diff > &diffs ) +{ + diffs.append( Diff( EQUAL, "" ) ); // Add a dummy entry at the end. + QMutableListIterator< Diff > pointer( diffs ); + int count_delete = 0; + int count_insert = 0; + QString text_delete = ""; + QString text_insert = ""; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *prevEqual = NULL; + int commonlength; + while ( thisDiff != NULL ) + { + switch ( thisDiff->operation ) + { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + prevEqual = NULL; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + prevEqual = NULL; + break; + case EQUAL: + if ( count_delete + count_insert > 1 ) + { + bool both_types = count_delete != 0 && count_insert != 0; + // Delete the offending records. + pointer.previous(); // Reverse direction. + while ( count_delete-- > 0 ) + { + pointer.previous(); + pointer.remove(); + } + while ( count_insert-- > 0 ) + { + pointer.previous(); + pointer.remove(); + } + if ( both_types ) + { + // Factor out any common prefixies. + commonlength = diff_commonPrefix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + if ( pointer.hasPrevious() ) + { + thisDiff = &pointer.previous(); + if ( thisDiff->operation != EQUAL ) + { + throw "Previous diff should have been an equality."; + } + thisDiff->text += text_insert.left( commonlength ); + pointer.next(); + } + else + { + pointer.insert( Diff( EQUAL, text_insert.left( commonlength ) ) ); + } + text_insert = safeMid( text_insert, commonlength ); + text_delete = safeMid( text_delete, commonlength ); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + thisDiff = &pointer.next(); + thisDiff->text = safeMid( text_insert, text_insert.length() - commonlength ) + thisDiff->text; + text_insert = text_insert.left( text_insert.length() - commonlength ); + text_delete = text_delete.left( text_delete.length() - commonlength ); + pointer.previous(); + } + } + // Insert the merged records. + if ( !text_delete.isEmpty() ) + { + pointer.insert( Diff( DELETE, text_delete ) ); + } + if ( !text_insert.isEmpty() ) + { + pointer.insert( Diff( INSERT, text_insert ) ); + } + // Step forward to the equality. + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - thisDiff->text += text_insert.left(commonlength); - pointer.next(); - } else { - pointer.insert(Diff(EQUAL, text_insert.left(commonlength))); - } - text_insert = safeMid(text_insert, commonlength); - text_delete = safeMid(text_delete, commonlength); - } - // Factor out any common suffixies. - commonlength = diff_commonSuffix(text_insert, text_delete); - if (commonlength != 0) { - thisDiff = &pointer.next(); - thisDiff->text = safeMid(text_insert, text_insert.length() - - commonlength) + thisDiff->text; - text_insert = text_insert.left(text_insert.length() - - commonlength); - text_delete = text_delete.left(text_delete.length() - - commonlength); - pointer.previous(); - } - } - // Insert the merged records. - if (!text_delete.isEmpty()) { - pointer.insert(Diff(DELETE, text_delete)); - } - if (!text_insert.isEmpty()) { - pointer.insert(Diff(INSERT, text_insert)); - } - // Step forward to the equality. - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - - } else if (prevEqual != NULL) { - // Merge this equality with the previous one. - prevEqual->text += thisDiff->text; - pointer.remove(); - thisDiff = &pointer.previous(); - pointer.next(); // Forward direction + else if ( prevEqual != NULL ) + { + // Merge this equality with the previous one. + prevEqual->text += thisDiff->text; + pointer.remove(); + thisDiff = &pointer.previous(); + pointer.next(); // Forward direction + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + prevEqual = thisDiff; + break; } - count_insert = 0; - count_delete = 0; - text_delete = ""; - text_insert = ""; - prevEqual = thisDiff; - break; - } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - if (diffs.back().text.isEmpty()) { - diffs.removeLast(); // Remove the dummy entry at the end. - } - - /* + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + if ( diffs.back().text.isEmpty() ) + { + diffs.removeLast(); // Remove the dummy entry at the end. + } + + /* * Second pass: look for single edits surrounded on both sides by equalities * which can be shifted sideways to eliminate an equality. * e.g: ABAC -> ABAC */ - bool changes = false; - // Create a new iterator at the start. - // (As opposed to walking the current one back.) - pointer.toFront(); - Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - - // Intentionally ignore the first and last element (don't need checking). - while (nextDiff != NULL) { - if (prevDiff->operation == EQUAL && - nextDiff->operation == EQUAL) { - // This is a single edit surrounded by equalities. - if (thisDiff->text.endsWith(prevDiff->text)) { - // Shift the edit over the previous equality. - thisDiff->text = prevDiff->text - + thisDiff->text.left(thisDiff->text.length() - - prevDiff->text.length()); - nextDiff->text = prevDiff->text + nextDiff->text; - pointer.previous(); // Walk past nextDiff. - pointer.previous(); // Walk past thisDiff. - pointer.previous(); // Walk past prevDiff. - pointer.remove(); // Delete prevDiff. - pointer.next(); // Walk past thisDiff. - thisDiff = &pointer.next(); // Walk past nextDiff. - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - changes = true; - } else if (thisDiff->text.startsWith(nextDiff->text)) { - // Shift the edit over the next equality. - prevDiff->text += nextDiff->text; - thisDiff->text = safeMid(thisDiff->text, nextDiff->text.length()) - + nextDiff->text; - pointer.remove(); // Delete nextDiff. - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - changes = true; + bool changes = false; + // Create a new iterator at the start. + // (As opposed to walking the current one back.) + pointer.toFront(); + Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + + // Intentionally ignore the first and last element (don't need checking). + while ( nextDiff != NULL ) + { + if ( prevDiff->operation == EQUAL && nextDiff->operation == EQUAL ) + { + // This is a single edit surrounded by equalities. + if ( thisDiff->text.endsWith( prevDiff->text ) ) + { + // Shift the edit over the previous equality. + thisDiff->text = prevDiff->text + thisDiff->text.left( thisDiff->text.length() - prevDiff->text.length() ); + nextDiff->text = prevDiff->text + nextDiff->text; + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + thisDiff = &pointer.next(); // Walk past nextDiff. + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + changes = true; + } + else if ( thisDiff->text.startsWith( nextDiff->text ) ) + { + // Shift the edit over the next equality. + prevDiff->text += nextDiff->text; + thisDiff->text = safeMid( thisDiff->text, nextDiff->text.length() ) + nextDiff->text; + pointer.remove(); // Delete nextDiff. + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + changes = true; + } } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if ( changes ) + { + diff_cleanupMerge( diffs ); } - prevDiff = thisDiff; - thisDiff = nextDiff; - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - // If shifts were made, the diff needs reordering and another shift sweep. - if (changes) { - diff_cleanupMerge(diffs); - } } - -int diff_match_patch::diff_xIndex(const QList &diffs, int loc) { - int chars1 = 0; - int chars2 = 0; - int last_chars1 = 0; - int last_chars2 = 0; - Diff lastDiff; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != INSERT) { - // Equality or deletion. - chars1 += aDiff.text.length(); - } - if (aDiff.operation != DELETE) { - // Equality or insertion. - chars2 += aDiff.text.length(); - } - if (chars1 > loc) { - // Overshot the location. - lastDiff = aDiff; - break; - } - last_chars1 = chars1; - last_chars2 = chars2; - } - if (lastDiff.operation == DELETE) { - // The location was deleted. - return last_chars2; - } - // Add the remaining character length. - return last_chars2 + (loc - last_chars1); +int diff_match_patch::diff_xIndex( const QList< Diff > &diffs, int loc ) +{ + int chars1 = 0; + int chars2 = 0; + int last_chars1 = 0; + int last_chars2 = 0; + Diff lastDiff; + foreach( Diff aDiff, diffs ) + { + if ( aDiff.operation != INSERT ) + { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if ( aDiff.operation != DELETE ) + { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if ( chars1 > loc ) + { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if ( lastDiff.operation == DELETE ) + { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + ( loc - last_chars1 ); } - -QString diff_match_patch::diff_prettyHtml(const QList &diffs) { - QString html; - QString text; - foreach(Diff aDiff, diffs) { - text = aDiff.text; - text.replace("&", "&").replace("<", "<") - .replace(">", ">").replace("\n", "¶
"); - switch (aDiff.operation) { - case INSERT: - html += QString("") + text - + QString(""); - break; - case DELETE: - html += QString("") + text - + QString(""); - break; - case EQUAL: - html += QString("") + text + QString(""); - break; - } - } - return html; +QString diff_match_patch::diff_prettyHtml( const QList< Diff > &diffs ) +{ + QString html; + QString text; + foreach( Diff aDiff, diffs ) + { + text = aDiff.text; + text.replace( "&", "&" ).replace( "<", "<" ).replace( ">", ">" ).replace( "\n", "¶
" ); + switch ( aDiff.operation ) + { + case INSERT: + html += QString( "" ) + text + QString( "" ); + break; + case DELETE: + html += QString( "" ) + text + QString( "" ); + break; + case EQUAL: + html += QString( "" ) + text + QString( "" ); + break; + } + } + return html; } - -QString diff_match_patch::diff_text1(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != INSERT) { - text += aDiff.text; +QString diff_match_patch::diff_text1( const QList< Diff > &diffs ) +{ + QString text; + foreach( Diff aDiff, diffs ) + { + if ( aDiff.operation != INSERT ) + { + text += aDiff.text; + } } - } - return text; + return text; } - -QString diff_match_patch::diff_text2(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != DELETE) { - text += aDiff.text; +QString diff_match_patch::diff_text2( const QList< Diff > &diffs ) +{ + QString text; + foreach( Diff aDiff, diffs ) + { + if ( aDiff.operation != DELETE ) + { + text += aDiff.text; + } } - } - return text; + return text; } - -int diff_match_patch::diff_levenshtein(const QList &diffs) { - int levenshtein = 0; - int insertions = 0; - int deletions = 0; - foreach(Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: - insertions += aDiff.text.length(); - break; - case DELETE: - deletions += aDiff.text.length(); - break; - case EQUAL: - // A deletion and an insertion is one substitution. - levenshtein += std::max(insertions, deletions); - insertions = 0; - deletions = 0; - break; - } - } - levenshtein += std::max(insertions, deletions); - return levenshtein; +int diff_match_patch::diff_levenshtein( const QList< Diff > &diffs ) +{ + int levenshtein = 0; + int insertions = 0; + int deletions = 0; + foreach( Diff aDiff, diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += std::max( insertions, deletions ); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += std::max( insertions, deletions ); + return levenshtein; } - -QString diff_match_patch::diff_toDelta(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: { - QString encoded = QString(QUrl::toPercentEncoding(aDiff.text, - " !~*'();/?:@&=+$,#")); - text += QString("+") + encoded + QString("\t"); - break; - } - case DELETE: - text += QString("-") + QString::number(aDiff.text.length()) - + QString("\t"); - break; - case EQUAL: - text += QString("=") + QString::number(aDiff.text.length()) - + QString("\t"); - break; - } - } - if (!text.isEmpty()) { - // Strip off trailing tab character. - text = text.left(text.length() - 1); - } - return text; +QString diff_match_patch::diff_toDelta( const QList< Diff > &diffs ) +{ + QString text; + foreach( Diff aDiff, diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + { + QString encoded = QString( QUrl::toPercentEncoding( aDiff.text, " !~*'();/?:@&=+$,#" ) ); + text += QString( "+" ) + encoded + QString( "\t" ); + break; + } + case DELETE: + text += QString( "-" ) + QString::number( aDiff.text.length() ) + QString( "\t" ); + break; + case EQUAL: + text += QString( "=" ) + QString::number( aDiff.text.length() ) + QString( "\t" ); + break; + } + } + if ( !text.isEmpty() ) + { + // Strip off trailing tab character. + text = text.left( text.length() - 1 ); + } + return text; } - -QList diff_match_patch::diff_fromDelta(const QString &text1, - const QString &delta) { - QList diffs; - int pointer = 0; // Cursor in text1 - QStringList tokens = delta.split("\t"); - foreach(QString token, tokens) { - if (token.isEmpty()) { - // Blank tokens are ok (from a trailing \t). - continue; - } - // Each token begins with a one character parameter which specifies the - // operation of this token (delete, insert, equality). - QString param = safeMid(token, 1); - switch (token[0].toAscii()) { - case '+': - param = QUrl::fromPercentEncoding(qPrintable(param)); - diffs.append(Diff(INSERT, param)); - break; - case '-': - // Fall through. - case '=': { - int n; - n = param.toInt(); - if (n < 0) { - throw QString("Negative number in diff_fromDelta: %1").arg(param); +QList< Diff > diff_match_patch::diff_fromDelta( const QString &text1, const QString &delta ) +{ + QList< Diff > diffs; + int pointer = 0; // Cursor in text1 + QStringList tokens = delta.split( "\t" ); + foreach( QString token, tokens ) + { + if ( token.isEmpty() ) + { + // Blank tokens are ok (from a trailing \t). + continue; } - QString text; - text = safeMid(text1, pointer, n); - pointer += n; - if (token[0] == QChar('=')) { - diffs.append(Diff(EQUAL, text)); - } else { - diffs.append(Diff(DELETE, text)); + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + QString param = safeMid( token, 1 ); + switch ( token[ 0 ].toAscii() ) + { + case '+': + param = QUrl::fromPercentEncoding( qPrintable( param ) ); + diffs.append( Diff( INSERT, param ) ); + break; + case '-': + // Fall through. + case '=': + { + int n; + n = param.toInt(); + if ( n < 0 ) + { + throw QString( "Negative number in diff_fromDelta: %1" ).arg( param ); + } + QString text; + text = safeMid( text1, pointer, n ); + pointer += n; + if ( token[ 0 ] == QChar( '=' ) ) + { + diffs.append( Diff( EQUAL, text ) ); + } + else + { + diffs.append( Diff( DELETE, text ) ); + } + break; + } + default: + throw QString( "Invalid diff operation in diff_fromDelta: %1" ).arg( token[ 0 ] ); } - break; - } - default: - throw QString("Invalid diff operation in diff_fromDelta: %1") - .arg(token[0]); - } - } - if (pointer != text1.length()) { - throw QString("Delta length (%1) smaller than source text length (%2)") - .arg(pointer).arg(text1.length()); - } - return diffs; + } + if ( pointer != text1.length() ) + { + throw QString( "Delta length (%1) smaller than source text length (%2)" ).arg( pointer ).arg( text1.length() ); + } + return diffs; } +// MATCH FUNCTIONS - // MATCH FUNCTIONS - - -int diff_match_patch::match_main(const QString &text, const QString &pattern, - int loc) { - // Check for null inputs. - if (text.isNull() || pattern.isNull()) { - throw "Null inputs. (match_main)"; - } +int diff_match_patch::match_main( const QString &text, const QString &pattern, int loc ) +{ + // Check for null inputs. + if ( text.isNull() || pattern.isNull() ) + { + throw "Null inputs. (match_main)"; + } - loc = std::max(0, std::min(loc, text.length())); - if (text == pattern) { - // Shortcut (potentially not guaranteed by the algorithm) - return 0; - } else if (text.isEmpty()) { - // Nothing to match. - return -1; - } else if (loc + pattern.length() <= text.length() - && safeMid(text, loc, pattern.length()) == pattern) { - // Perfect match at the perfect spot! (Includes case of null pattern) - return loc; - } else { - // Do a fuzzy compare. - return match_bitap(text, pattern, loc); - } + loc = std::max( 0, std::min( loc, text.length() ) ); + if ( text == pattern ) + { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } + else if ( text.isEmpty() ) + { + // Nothing to match. + return -1; + } + else if ( loc + pattern.length() <= text.length() && safeMid( text, loc, pattern.length() ) == pattern ) + { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } + else + { + // Do a fuzzy compare. + return match_bitap( text, pattern, loc ); + } } +int diff_match_patch::match_bitap( const QString &text, const QString &pattern, int loc ) +{ + if ( !( Match_MaxBits == 0 || pattern.length() <= Match_MaxBits ) ) + { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + QMap< QChar, int > s = match_alphabet( pattern ); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + int best_loc = text.indexOf( pattern, loc ); + if ( best_loc != -1 ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + // What about in the other direction? (speedup) + best_loc = text.lastIndexOf( pattern, loc + pattern.length() ); + if ( best_loc != -1 ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + } + } -int diff_match_patch::match_bitap(const QString &text, const QString &pattern, - int loc) { - if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { - throw "Pattern too long for this application."; - } - - // Initialise the alphabet. - QMap s = match_alphabet(pattern); - - // Highest score beyond which we give up. - double score_threshold = Match_Threshold; - // Is there a nearby exact match? (speedup) - int best_loc = text.indexOf(pattern, loc); - if (best_loc != -1) { - score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), - score_threshold); - // What about in the other direction? (speedup) - best_loc = text.lastIndexOf(pattern, loc + pattern.length()); - if (best_loc != -1) { - score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), - score_threshold); - } - } - - // Initialise the bit arrays. - int matchmask = 1 << (pattern.length() - 1); - best_loc = -1; - - int bin_min, bin_mid; - int bin_max = pattern.length() + text.length(); - int *rd; - int *last_rd = NULL; - for (int d = 0; d < pattern.length(); d++) { - // Scan for the best match; each iteration allows for one more error. - // Run a binary search to determine how far from 'loc' we can stray at - // this error level. - bin_min = 0; - bin_mid = bin_max; - while (bin_min < bin_mid) { - if (match_bitapScore(d, loc + bin_mid, loc, pattern) - <= score_threshold) { - bin_min = bin_mid; - } else { + // Initialise the bit arrays. + int matchmask = 1 << ( pattern.length() - 1 ); + best_loc = -1; + + int bin_min, bin_mid; + int bin_max = pattern.length() + text.length(); + int *rd; + int *last_rd = NULL; + for ( int d = 0; d < pattern.length(); d++ ) + { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while ( bin_min < bin_mid ) + { + if ( match_bitapScore( d, loc + bin_mid, loc, pattern ) <= score_threshold ) + { + bin_min = bin_mid; + } + else + { + bin_max = bin_mid; + } + bin_mid = ( bin_max - bin_min ) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. bin_max = bin_mid; - } - bin_mid = (bin_max - bin_min) / 2 + bin_min; - } - // Use the result from this iteration as the maximum for the next. - bin_max = bin_mid; - int start = std::max(1, loc - bin_mid + 1); - int finish = std::min(loc + bin_mid, text.length()) + pattern.length(); - - rd = new int[finish + 2]; - rd[finish + 1] = (1 << d) - 1; - for (int j = finish; j >= start; j--) { - int charMatch; - if (text.length() <= j - 1) { - // Out of range. - charMatch = 0; - } else { - charMatch = s.value(text[j - 1], 0); - } - if (d == 0) { - // First pass: exact match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; - } else { - // Subsequent passes: fuzzy match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch - | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) - | last_rd[j + 1]; - } - if ((rd[j] & matchmask) != 0) { - double score = match_bitapScore(d, j - 1, loc, pattern); - // This match will almost certainly be better than any existing - // match. But check anyway. - if (score <= score_threshold) { - // Told you so. - score_threshold = score; - best_loc = j - 1; - if (best_loc > loc) { - // When passing loc, don't exceed our current distance from loc. - start = std::max(1, 2 * loc - best_loc); - } else { - // Already passed loc, downhill from here on in. + int start = std::max( 1, loc - bin_mid + 1 ); + int finish = std::min( loc + bin_mid, text.length() ) + pattern.length(); + + rd = new int[ finish + 2 ]; + rd[ finish + 1 ] = ( 1 << d ) - 1; + for ( int j = finish; j >= start; j-- ) + { + int charMatch; + if ( text.length() <= j - 1 ) + { + // Out of range. + charMatch = 0; + } + else + { + charMatch = s.value( text[ j - 1 ], 0 ); + } + if ( d == 0 ) + { + // First pass: exact match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch; + } + else + { + // Subsequent passes: fuzzy match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch | ( ( ( last_rd[ j + 1 ] | last_rd[ j ] ) << 1 ) | 1 ) | last_rd[ j + 1 ]; + } + if ( ( rd[ j ] & matchmask ) != 0 ) + { + double score = match_bitapScore( d, j - 1, loc, pattern ); + // This match will almost certainly be better than any existing + // match. But check anyway. + if ( score <= score_threshold ) + { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if ( best_loc > loc ) + { + // When passing loc, don't exceed our current distance from loc. + start = std::max( 1, 2 * loc - best_loc ); + } + else + { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if ( match_bitapScore( d + 1, loc, loc, pattern ) > score_threshold ) + { + // No hope for a (better) match at greater error levels. break; - } } - } - } - if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { - // No hope for a (better) match at greater error levels. - break; - } - delete [] last_rd; - last_rd = rd; - } - delete [] last_rd; - delete [] rd; - return best_loc; + delete[] last_rd; + last_rd = rd; + } + delete[] last_rd; + delete[] rd; + return best_loc; } - -double diff_match_patch::match_bitapScore(int e, int x, int loc, - const QString &pattern) { - const float accuracy = static_cast (e) / pattern.length(); - const int proximity = qAbs(loc - x); - if (Match_Distance == 0) { - // Dodge divide by zero error. - return proximity == 0 ? accuracy : 1.0; - } - return accuracy + (proximity / static_cast (Match_Distance)); +double diff_match_patch::match_bitapScore( int e, int x, int loc, const QString &pattern ) +{ + const float accuracy = static_cast< float >( e ) / pattern.length(); + const int proximity = qAbs( loc - x ); + if ( Match_Distance == 0 ) + { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + ( proximity / static_cast< float >( Match_Distance ) ); } - -QMap diff_match_patch::match_alphabet(const QString &pattern) { - QMap s; - int i; - for (i = 0; i < pattern.length(); i++) { - QChar c = pattern[i]; - s.insert(c, 0); - } - for (i = 0; i < pattern.length(); i++) { - QChar c = pattern[i]; - s.insert(c, s.value(c) | (1 << (pattern.length() - i - 1))); - } - return s; +QMap< QChar, int > diff_match_patch::match_alphabet( const QString &pattern ) +{ + QMap< QChar, int > s; + int i; + for ( i = 0; i < pattern.length(); i++ ) + { + QChar c = pattern[ i ]; + s.insert( c, 0 ); + } + for ( i = 0; i < pattern.length(); i++ ) + { + QChar c = pattern[ i ]; + s.insert( c, s.value( c ) | ( 1 << ( pattern.length() - i - 1 ) ) ); + } + return s; } - // PATCH FUNCTIONS +void diff_match_patch::patch_addContext( Patch &patch, const QString &text ) +{ + if ( text.isEmpty() ) + { + return; + } + QString pattern = safeMid( text, patch.start2, patch.length1 ); + int padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while ( text.indexOf( pattern ) != text.lastIndexOf( pattern ) && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin ) + { + padding += Patch_Margin; + pattern = safeMid( text, std::max( 0, patch.start2 - padding ), std::min( text.length(), patch.start2 + patch.length1 + padding ) - std::max( 0, patch.start2 - padding ) ); + } + // Add one chunk for good luck. + padding += Patch_Margin; -void diff_match_patch::patch_addContext(Patch &patch, const QString &text) { - if (text.isEmpty()) { - return; - } - QString pattern = safeMid(text, patch.start2, patch.length1); - int padding = 0; + // Add the prefix. + QString prefix = safeMid( text, std::max( 0, patch.start2 - padding ), patch.start2 - std::max( 0, patch.start2 - padding ) ); + if ( !prefix.isEmpty() ) + { + patch.diffs.prepend( Diff( EQUAL, prefix ) ); + } + // Add the suffix. + QString suffix = safeMid( text, patch.start2 + patch.length1, std::min( text.length(), patch.start2 + patch.length1 + padding ) - ( patch.start2 + patch.length1 ) ); + if ( !suffix.isEmpty() ) + { + patch.diffs.append( Diff( EQUAL, suffix ) ); + } - // Look for the first and last matches of pattern in text. If two different - // matches are found, increase the pattern length. - while (text.indexOf(pattern) != text.lastIndexOf(pattern) - && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { - padding += Patch_Margin; - pattern = safeMid(text, std::max(0, patch.start2 - padding), - std::min(text.length(), patch.start2 + patch.length1 + padding) - - std::max(0, patch.start2 - padding)); - } - // Add one chunk for good luck. - padding += Patch_Margin; - - // Add the prefix. - QString prefix = safeMid(text, std::max(0, patch.start2 - padding), - patch.start2 - std::max(0, patch.start2 - padding)); - if (!prefix.isEmpty()) { - patch.diffs.prepend(Diff(EQUAL, prefix)); - } - // Add the suffix. - QString suffix = safeMid(text, patch.start2 + patch.length1, - std::min(text.length(), patch.start2 + patch.length1 + padding) - - (patch.start2 + patch.length1)); - if (!suffix.isEmpty()) { - patch.diffs.append(Diff(EQUAL, suffix)); - } - - // Roll back the start points. - patch.start1 -= prefix.length(); - patch.start2 -= prefix.length(); - // Extend the lengths. - patch.length1 += prefix.length() + suffix.length(); - patch.length2 += prefix.length() + suffix.length(); + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); } +QList< Patch > diff_match_patch::patch_make( const QString &text1, const QString &text2 ) +{ + // Check for null inputs. + if ( text1.isNull() || text2.isNull() ) + { + throw "Null inputs. (patch_make)"; + } -QList diff_match_patch::patch_make(const QString &text1, - const QString &text2) { - // Check for null inputs. - if (text1.isNull() || text2.isNull()) { - throw "Null inputs. (patch_make)"; - } - - // No diffs provided, compute our own. - QList diffs = diff_main(text1, text2, true); - if (diffs.size() > 2) { - diff_cleanupSemantic(diffs); - diff_cleanupEfficiency(diffs); - } + // No diffs provided, compute our own. + QList< Diff > diffs = diff_main( text1, text2, true ); + if ( diffs.size() > 2 ) + { + diff_cleanupSemantic( diffs ); + diff_cleanupEfficiency( diffs ); + } - return patch_make(text1, diffs); + return patch_make( text1, diffs ); } - -QList diff_match_patch::patch_make(const QList &diffs) { - // No origin string provided, compute our own. - const QString text1 = diff_text1(diffs); - return patch_make(text1, diffs); +QList< Patch > diff_match_patch::patch_make( const QList< Diff > &diffs ) +{ + // No origin string provided, compute our own. + const QString text1 = diff_text1( diffs ); + return patch_make( text1, diffs ); } +QList< Patch > diff_match_patch::patch_make( const QString &text1, const QString &text2, const QList< Diff > &diffs ) +{ + // text2 is entirely unused. + return patch_make( text1, diffs ); -QList diff_match_patch::patch_make(const QString &text1, - const QString &text2, - const QList &diffs) { - // text2 is entirely unused. - return patch_make(text1, diffs); - - Q_UNUSED(text2) + Q_UNUSED( text2 ) } +QList< Patch > diff_match_patch::patch_make( const QString &text1, const QList< Diff > &diffs ) +{ + // Check for null inputs. + if ( text1.isNull() ) + { + throw "Null inputs. (patch_make)"; + } -QList diff_match_patch::patch_make(const QString &text1, - const QList &diffs) { - // Check for null inputs. - if (text1.isNull()) { - throw "Null inputs. (patch_make)"; - } - - QList patches; - if (diffs.isEmpty()) { - return patches; // Get rid of the null case. - } - Patch patch; - int char_count1 = 0; // Number of characters into the text1 string. - int char_count2 = 0; // Number of characters into the text2 string. - // Start with text1 (prepatch_text) and apply the diffs until we arrive at - // text2 (postpatch_text). We recreate the patches one by one to determine - // context info. - QString prepatch_text = text1; - QString postpatch_text = text1; - foreach(Diff aDiff, diffs) { - if (patch.diffs.isEmpty() && aDiff.operation != EQUAL) { - // A new patch starts here. - patch.start1 = char_count1; - patch.start2 = char_count2; - } - - switch (aDiff.operation) { - case INSERT: - patch.diffs.append(aDiff); - patch.length2 += aDiff.text.length(); - postpatch_text = postpatch_text.left(char_count2) - + aDiff.text + safeMid(postpatch_text, char_count2); - break; - case DELETE: - patch.length1 += aDiff.text.length(); - patch.diffs.append(aDiff); - postpatch_text = postpatch_text.left(char_count2) - + safeMid(postpatch_text, char_count2 + aDiff.text.length()); - break; - case EQUAL: - if (aDiff.text.length() <= 2 * Patch_Margin - && !patch.diffs.isEmpty() && !(aDiff == diffs.back())) { - // Small equality inside a patch. - patch.diffs.append(aDiff); - patch.length1 += aDiff.text.length(); - patch.length2 += aDiff.text.length(); + QList< Patch > patches; + if ( diffs.isEmpty() ) + { + return patches; // Get rid of the null case. + } + Patch patch; + int char_count1 = 0; // Number of characters into the text1 string. + int char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + QString prepatch_text = text1; + QString postpatch_text = text1; + foreach( Diff aDiff, diffs ) + { + if ( patch.diffs.isEmpty() && aDiff.operation != EQUAL ) + { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; } - if (aDiff.text.length() >= 2 * Patch_Margin) { - // Time for a new patch. - if (!patch.diffs.isEmpty()) { - patch_addContext(patch, prepatch_text); - patches.append(patch); - patch = Patch(); - // Unlike Unidiff, our patch lists have a rolling context. - // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff - // Update prepatch text & pos to reflect the application of the - // just completed patch. - prepatch_text = postpatch_text; - char_count1 = char_count2; - } + switch ( aDiff.operation ) + { + case INSERT: + patch.diffs.append( aDiff ); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.left( char_count2 ) + aDiff.text + safeMid( postpatch_text, char_count2 ); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.append( aDiff ); + postpatch_text = postpatch_text.left( char_count2 ) + safeMid( postpatch_text, char_count2 + aDiff.text.length() ); + break; + case EQUAL: + if ( aDiff.text.length() <= 2 * Patch_Margin && !patch.diffs.isEmpty() && !( aDiff == diffs.back() ) ) + { + // Small equality inside a patch. + patch.diffs.append( aDiff ); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); + } + + if ( aDiff.text.length() >= 2 * Patch_Margin ) + { + // Time for a new patch. + if ( !patch.diffs.isEmpty() ) + { + patch_addContext( patch, prepatch_text ); + patches.append( patch ); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; } - break; - } - // Update the current character count. - if (aDiff.operation != INSERT) { - char_count1 += aDiff.text.length(); + // Update the current character count. + if ( aDiff.operation != INSERT ) + { + char_count1 += aDiff.text.length(); + } + if ( aDiff.operation != DELETE ) + { + char_count2 += aDiff.text.length(); + } } - if (aDiff.operation != DELETE) { - char_count2 += aDiff.text.length(); + // Pick up the leftover patch if not empty. + if ( !patch.diffs.isEmpty() ) + { + patch_addContext( patch, prepatch_text ); + patches.append( patch ); } - } - // Pick up the leftover patch if not empty. - if (!patch.diffs.isEmpty()) { - patch_addContext(patch, prepatch_text); - patches.append(patch); - } - return patches; + return patches; } - -QList diff_match_patch::patch_deepCopy(QList &patches) { - QList patchesCopy; - foreach(Patch aPatch, patches) { - Patch patchCopy = Patch(); - foreach(Diff aDiff, aPatch.diffs) { - Diff diffCopy = Diff(aDiff.operation, aDiff.text); - patchCopy.diffs.append(diffCopy); - } - patchCopy.start1 = aPatch.start1; - patchCopy.start2 = aPatch.start2; - patchCopy.length1 = aPatch.length1; - patchCopy.length2 = aPatch.length2; - patchesCopy.append(patchCopy); - } - return patchesCopy; +QList< Patch > diff_match_patch::patch_deepCopy( QList< Patch > &patches ) +{ + QList< Patch > patchesCopy; + foreach( Patch aPatch, patches ) + { + Patch patchCopy = Patch(); + foreach( Diff aDiff, aPatch.diffs ) + { + Diff diffCopy = Diff( aDiff.operation, aDiff.text ); + patchCopy.diffs.append( diffCopy ); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.append( patchCopy ); + } + return patchesCopy; } +QPair< QString, QVector< bool > > diff_match_patch::patch_apply( QList< Patch > &patches, const QString &sourceText ) +{ + QString text = sourceText; // Copy to preserve original. + if ( patches.isEmpty() ) + { + return QPair< QString, QVector< bool > >( text, QVector< bool >( 0 ) ); + } -QPair > diff_match_patch::patch_apply( - QList &patches, const QString &sourceText) { - QString text = sourceText; // Copy to preserve original. - if (patches.isEmpty()) { - return QPair >(text, QVector(0)); - } - - // Deep copy the patches so that no changes are made to originals. - QList patchesCopy = patch_deepCopy(patches); - - QString nullPadding = patch_addPadding(patchesCopy); - text = nullPadding + text + nullPadding; - patch_splitMax(patchesCopy); - - int x = 0; - // delta keeps track of the offset between the expected and actual location - // of the previous patch. If there are patches expected at positions 10 and - // 20, but the first patch was found at 12, delta is 2 and the second patch - // has an effective expected position of 22. - int delta = 0; - QVector results(patchesCopy.size()); - foreach(Patch aPatch, patchesCopy) { - int expected_loc = aPatch.start2 + delta; - QString text1 = diff_text1(aPatch.diffs); - int start_loc; - int end_loc = -1; - if (text1.length() > Match_MaxBits) { - // patch_splitMax will only provide an oversized pattern in the case of - // a monster delete. - start_loc = match_main(text, text1.left(Match_MaxBits), expected_loc); - if (start_loc != -1) { - end_loc = match_main(text, text1.right(Match_MaxBits), - expected_loc + text1.length() - Match_MaxBits); - if (end_loc == -1 || start_loc >= end_loc) { - // Can't find valid trailing context. Drop this patch. - start_loc = -1; + // Deep copy the patches so that no changes are made to originals. + QList< Patch > patchesCopy = patch_deepCopy( patches ); + + QString nullPadding = patch_addPadding( patchesCopy ); + text = nullPadding + text + nullPadding; + patch_splitMax( patchesCopy ); + + int x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + int delta = 0; + QVector< bool > results( patchesCopy.size() ); + foreach( Patch aPatch, patchesCopy ) + { + int expected_loc = aPatch.start2 + delta; + QString text1 = diff_text1( aPatch.diffs ); + int start_loc; + int end_loc = -1; + if ( text1.length() > Match_MaxBits ) + { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main( text, text1.left( Match_MaxBits ), expected_loc ); + if ( start_loc != -1 ) + { + end_loc = match_main( text, text1.right( Match_MaxBits ), expected_loc + text1.length() - Match_MaxBits ); + if ( end_loc == -1 || start_loc >= end_loc ) + { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } + else + { + start_loc = match_main( text, text1, expected_loc ); } - } - } else { - start_loc = match_main(text, text1, expected_loc); - } - if (start_loc == -1) { - // No match found. :( - results[x] = false; - // Subtract the delta for this failed patch from subsequent patches. - delta -= aPatch.length2 - aPatch.length1; - } else { - // Found a match. :) - results[x] = true; - delta = start_loc - expected_loc; - QString text2; - if (end_loc == -1) { - text2 = safeMid(text, start_loc, text1.length()); - } else { - text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); - } - if (text1 == text2) { - // Perfect match, just shove the replacement text in. - text = text.left(start_loc) + diff_text2(aPatch.diffs) - + safeMid(text, start_loc + text1.length()); - } else { - // Imperfect match. Run a diff to get a framework of equivalent - // indices. - QList diffs = diff_main(text1, text2, false); - if (text1.length() > Match_MaxBits - && diff_levenshtein(diffs) / static_cast (text1.length()) - > Patch_DeleteThreshold) { - // The end points match, but the content is unacceptably bad. - results[x] = false; - } else { - diff_cleanupSemanticLossless(diffs); - int index1 = 0; - foreach(Diff aDiff, aPatch.diffs) { - if (aDiff.operation != EQUAL) { - int index2 = diff_xIndex(diffs, index1); - if (aDiff.operation == INSERT) { - // Insertion - text = text.left(start_loc + index2) + aDiff.text - + safeMid(text, start_loc + index2); - } else if (aDiff.operation == DELETE) { - // Deletion - text = text.left(start_loc + index2) - + safeMid(text, start_loc + diff_xIndex(diffs, - index1 + aDiff.text.length())); - } + if ( start_loc == -1 ) + { + // No match found. :( + results[ x ] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } + else + { + // Found a match. :) + results[ x ] = true; + delta = start_loc - expected_loc; + QString text2; + if ( end_loc == -1 ) + { + text2 = safeMid( text, start_loc, text1.length() ); + } + else + { + text2 = safeMid( text, start_loc, end_loc + Match_MaxBits - start_loc ); } - if (aDiff.operation != DELETE) { - index1 += aDiff.text.length(); + if ( text1 == text2 ) + { + // Perfect match, just shove the replacement text in. + text = text.left( start_loc ) + diff_text2( aPatch.diffs ) + safeMid( text, start_loc + text1.length() ); + } + else + { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + QList< Diff > diffs = diff_main( text1, text2, false ); + if ( text1.length() > Match_MaxBits && diff_levenshtein( diffs ) / static_cast< float >( text1.length() ) > Patch_DeleteThreshold ) + { + // The end points match, but the content is unacceptably bad. + results[ x ] = false; + } + else + { + diff_cleanupSemanticLossless( diffs ); + int index1 = 0; + foreach( Diff aDiff, aPatch.diffs ) + { + if ( aDiff.operation != EQUAL ) + { + int index2 = diff_xIndex( diffs, index1 ); + if ( aDiff.operation == INSERT ) + { + // Insertion + text = text.left( start_loc + index2 ) + aDiff.text + safeMid( text, start_loc + index2 ); + } + else if ( aDiff.operation == DELETE ) + { + // Deletion + text = text.left( start_loc + index2 ) + safeMid( text, start_loc + diff_xIndex( diffs, index1 + aDiff.text.length() ) ); + } + } + if ( aDiff.operation != DELETE ) + { + index1 += aDiff.text.length(); + } + } + } } - } } - } - } - x++; - } - // Strip the padding off. - text = safeMid(text, nullPadding.length(), text.length() - - 2 * nullPadding.length()); - return QPair >(text, results); + x++; + } + // Strip the padding off. + text = safeMid( text, nullPadding.length(), text.length() - 2 * nullPadding.length() ); + return QPair< QString, QVector< bool > >( text, results ); } +QString diff_match_patch::patch_addPadding( QList< Patch > &patches ) +{ + short paddingLength = Patch_Margin; + QString nullPadding = ""; + for ( short x = 1; x <= paddingLength; x++ ) + { + nullPadding += QChar( (ushort)x ); + } + + // Bump all the patches forward. + QMutableListIterator< Patch > pointer( patches ); + while ( pointer.hasNext() ) + { + Patch &aPatch = pointer.next(); + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch &firstPatch = patches.first(); + QList< Diff > &firstPatchDiffs = firstPatch.diffs; + if ( firstPatchDiffs.empty() || firstPatchDiffs.first().operation != EQUAL ) + { + // Add nullPadding equality. + firstPatchDiffs.prepend( Diff( EQUAL, nullPadding ) ); + firstPatch.start1 -= paddingLength; // Should be 0. + firstPatch.start2 -= paddingLength; // Should be 0. + firstPatch.length1 += paddingLength; + firstPatch.length2 += paddingLength; + } + else if ( paddingLength > firstPatchDiffs.first().text.length() ) + { + // Grow first equality. + Diff &firstDiff = firstPatchDiffs.first(); + int extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = safeMid( nullPadding, firstDiff.text.length(), paddingLength - firstDiff.text.length() ) + firstDiff.text; + firstPatch.start1 -= extraLength; + firstPatch.start2 -= extraLength; + firstPatch.length1 += extraLength; + firstPatch.length2 += extraLength; + } + + // Add some padding on end of last diff. + Patch &lastPatch = patches.first(); + QList< Diff > &lastPatchDiffs = lastPatch.diffs; + if ( lastPatchDiffs.empty() || lastPatchDiffs.last().operation != EQUAL ) + { + // Add nullPadding equality. + lastPatchDiffs.append( Diff( EQUAL, nullPadding ) ); + lastPatch.length1 += paddingLength; + lastPatch.length2 += paddingLength; + } + else if ( paddingLength > lastPatchDiffs.last().text.length() ) + { + // Grow last equality. + Diff &lastDiff = lastPatchDiffs.last(); + int extraLength = paddingLength - lastDiff.text.length(); + lastDiff.text += nullPadding.left( extraLength ); + lastPatch.length1 += extraLength; + lastPatch.length2 += extraLength; + } -QString diff_match_patch::patch_addPadding(QList &patches) { - short paddingLength = Patch_Margin; - QString nullPadding = ""; - for (short x = 1; x <= paddingLength; x++) { - nullPadding += QChar((ushort)x); - } - - // Bump all the patches forward. - QMutableListIterator pointer(patches); - while (pointer.hasNext()) { - Patch &aPatch = pointer.next(); - aPatch.start1 += paddingLength; - aPatch.start2 += paddingLength; - } - - // Add some padding on start of first diff. - Patch &firstPatch = patches.first(); - QList &firstPatchDiffs = firstPatch.diffs; - if (firstPatchDiffs.empty() || firstPatchDiffs.first().operation != EQUAL) { - // Add nullPadding equality. - firstPatchDiffs.prepend(Diff(EQUAL, nullPadding)); - firstPatch.start1 -= paddingLength; // Should be 0. - firstPatch.start2 -= paddingLength; // Should be 0. - firstPatch.length1 += paddingLength; - firstPatch.length2 += paddingLength; - } else if (paddingLength > firstPatchDiffs.first().text.length()) { - // Grow first equality. - Diff &firstDiff = firstPatchDiffs.first(); - int extraLength = paddingLength - firstDiff.text.length(); - firstDiff.text = safeMid(nullPadding, firstDiff.text.length(), - paddingLength - firstDiff.text.length()) + firstDiff.text; - firstPatch.start1 -= extraLength; - firstPatch.start2 -= extraLength; - firstPatch.length1 += extraLength; - firstPatch.length2 += extraLength; - } - - // Add some padding on end of last diff. - Patch &lastPatch = patches.first(); - QList &lastPatchDiffs = lastPatch.diffs; - if (lastPatchDiffs.empty() || lastPatchDiffs.last().operation != EQUAL) { - // Add nullPadding equality. - lastPatchDiffs.append(Diff(EQUAL, nullPadding)); - lastPatch.length1 += paddingLength; - lastPatch.length2 += paddingLength; - } else if (paddingLength > lastPatchDiffs.last().text.length()) { - // Grow last equality. - Diff &lastDiff = lastPatchDiffs.last(); - int extraLength = paddingLength - lastDiff.text.length(); - lastDiff.text += nullPadding.left(extraLength); - lastPatch.length1 += extraLength; - lastPatch.length2 += extraLength; - } - - return nullPadding; + return nullPadding; } +void diff_match_patch::patch_splitMax( QList< Patch > &patches ) +{ + short patch_size = Match_MaxBits; + QString precontext, postcontext; + Patch patch; + int start1, start2; + bool empty; + Operation diff_type; + QString diff_text; + QMutableListIterator< Patch > pointer( patches ); + Patch bigpatch; + + if ( pointer.hasNext() ) + { + bigpatch = pointer.next(); + } -void diff_match_patch::patch_splitMax(QList &patches) { - short patch_size = Match_MaxBits; - QString precontext, postcontext; - Patch patch; - int start1, start2; - bool empty; - Operation diff_type; - QString diff_text; - QMutableListIterator pointer(patches); - Patch bigpatch; - - if (pointer.hasNext()) { - bigpatch = pointer.next(); - } - - while (!bigpatch.isNull()) { - if (bigpatch.length1 <= patch_size) { - bigpatch = pointer.hasNext() ? pointer.next() : Patch(); - continue; - } - // Remove the big old patch. - pointer.remove(); - start1 = bigpatch.start1; - start2 = bigpatch.start2; - precontext = ""; - while (!bigpatch.diffs.isEmpty()) { - // Create one of several smaller patches. - patch = Patch(); - empty = true; - patch.start1 = start1 - precontext.length(); - patch.start2 = start2 - precontext.length(); - if (!precontext.isEmpty()) { - patch.length1 = patch.length2 = precontext.length(); - patch.diffs.append(Diff(EQUAL, precontext)); - } - while (!bigpatch.diffs.isEmpty() - && patch.length1 < patch_size - Patch_Margin) { - diff_type = bigpatch.diffs.front().operation; - diff_text = bigpatch.diffs.front().text; - if (diff_type == INSERT) { - // Insertions are harmless. - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - patch.diffs.append(bigpatch.diffs.front()); - bigpatch.diffs.removeFirst(); - empty = false; - } else if (diff_type == DELETE && patch.diffs.size() == 1 - && patch.diffs.front().operation == EQUAL - && diff_text.length() > 2 * patch_size) { - // This is a large deletion. Let it pass in one chunk. - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - empty = false; - patch.diffs.append(Diff(diff_type, diff_text)); - bigpatch.diffs.removeFirst(); - } else { - // Deletion or equality. Only take as much as we can stomach. - diff_text = diff_text.left(std::min(diff_text.length(), - patch_size - patch.length1 - Patch_Margin)); - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - if (diff_type == EQUAL) { - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - } else { - empty = false; - } - patch.diffs.append(Diff(diff_type, diff_text)); - if (diff_text == bigpatch.diffs.front().text) { - bigpatch.diffs.removeFirst(); - } else { - bigpatch.diffs.front().text = safeMid(bigpatch.diffs.front().text, - diff_text.length()); - } + while ( !bigpatch.isNull() ) + { + if ( bigpatch.length1 <= patch_size ) + { + bigpatch = pointer.hasNext() ? pointer.next() : Patch(); + continue; } - } - // Compute the head context for the next patch. - precontext = diff_text2(patch.diffs); - precontext = safeMid(precontext, precontext.length() - Patch_Margin); - // Append the end context for this patch. - if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { - postcontext = diff_text1(bigpatch.diffs).left(Patch_Margin); - } else { - postcontext = diff_text1(bigpatch.diffs); - } - if (!postcontext.isEmpty()) { - patch.length1 += postcontext.length(); - patch.length2 += postcontext.length(); - if (!patch.diffs.isEmpty() - && patch.diffs.back().operation == EQUAL) { - patch.diffs.back().text += postcontext; - } else { - patch.diffs.append(Diff(EQUAL, postcontext)); + // Remove the big old patch. + pointer.remove(); + start1 = bigpatch.start1; + start2 = bigpatch.start2; + precontext = ""; + while ( !bigpatch.diffs.isEmpty() ) + { + // Create one of several smaller patches. + patch = Patch(); + empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if ( !precontext.isEmpty() ) + { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.append( Diff( EQUAL, precontext ) ); + } + while ( !bigpatch.diffs.isEmpty() && patch.length1 < patch_size - Patch_Margin ) + { + diff_type = bigpatch.diffs.front().operation; + diff_text = bigpatch.diffs.front().text; + if ( diff_type == INSERT ) + { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.append( bigpatch.diffs.front() ); + bigpatch.diffs.removeFirst(); + empty = false; + } + else if ( diff_type == DELETE && patch.diffs.size() == 1 && patch.diffs.front().operation == EQUAL && diff_text.length() > 2 * patch_size ) + { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.append( Diff( diff_type, diff_text ) ); + bigpatch.diffs.removeFirst(); + } + else + { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.left( std::min( diff_text.length(), patch_size - patch.length1 - Patch_Margin ) ); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if ( diff_type == EQUAL ) + { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } + else + { + empty = false; + } + patch.diffs.append( Diff( diff_type, diff_text ) ); + if ( diff_text == bigpatch.diffs.front().text ) + { + bigpatch.diffs.removeFirst(); + } + else + { + bigpatch.diffs.front().text = safeMid( bigpatch.diffs.front().text, diff_text.length() ); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2( patch.diffs ); + precontext = safeMid( precontext, precontext.length() - Patch_Margin ); + // Append the end context for this patch. + if ( diff_text1( bigpatch.diffs ).length() > Patch_Margin ) + { + postcontext = diff_text1( bigpatch.diffs ).left( Patch_Margin ); + } + else + { + postcontext = diff_text1( bigpatch.diffs ); + } + if ( !postcontext.isEmpty() ) + { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if ( !patch.diffs.isEmpty() && patch.diffs.back().operation == EQUAL ) + { + patch.diffs.back().text += postcontext; + } + else + { + patch.diffs.append( Diff( EQUAL, postcontext ) ); + } + } + if ( !empty ) + { + pointer.insert( patch ); + } } - } - if (!empty) { - pointer.insert(patch); - } + bigpatch = pointer.hasNext() ? pointer.next() : Patch(); } - bigpatch = pointer.hasNext() ? pointer.next() : Patch(); - } } - -QString diff_match_patch::patch_toText(const QList &patches) { - QString text; - foreach(Patch aPatch, patches) { - text.append(aPatch.toString()); - } - return text; +QString diff_match_patch::patch_toText( const QList< Patch > &patches ) +{ + QString text; + foreach( Patch aPatch, patches ) + { + text.append( aPatch.toString() ); + } + return text; } +QList< Patch > diff_match_patch::patch_fromText( const QString &textline ) +{ + QList< Patch > patches; + if ( textline.isEmpty() ) + { + return patches; + } + QStringList text = textline.split( "\n", QString::SkipEmptyParts ); + Patch patch; + QRegExp patchHeader( "^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$" ); + char sign; + QString line; + while ( !text.isEmpty() ) + { + if ( !patchHeader.exactMatch( text.front() ) ) + { + throw QString( "Invalid patch string: %1" ).arg( text.front() ); + } + + patch = Patch(); + patch.start1 = patchHeader.cap( 1 ).toInt(); + if ( patchHeader.cap( 2 ).isEmpty() ) + { + patch.start1--; + patch.length1 = 1; + } + else if ( patchHeader.cap( 2 ) == "0" ) + { + patch.length1 = 0; + } + else + { + patch.start1--; + patch.length1 = patchHeader.cap( 2 ).toInt(); + } -QList diff_match_patch::patch_fromText(const QString &textline) { - QList patches; - if (textline.isEmpty()) { - return patches; - } - QStringList text = textline.split("\n", QString::SkipEmptyParts); - Patch patch; - QRegExp patchHeader("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); - char sign; - QString line; - while (!text.isEmpty()) { - if (!patchHeader.exactMatch(text.front())) { - throw QString("Invalid patch string: %1").arg(text.front()); - } - - patch = Patch(); - patch.start1 = patchHeader.cap(1).toInt(); - if (patchHeader.cap(2).isEmpty()) { - patch.start1--; - patch.length1 = 1; - } else if (patchHeader.cap(2) == "0") { - patch.length1 = 0; - } else { - patch.start1--; - patch.length1 = patchHeader.cap(2).toInt(); - } - - patch.start2 = patchHeader.cap(3).toInt(); - if (patchHeader.cap(4).isEmpty()) { - patch.start2--; - patch.length2 = 1; - } else if (patchHeader.cap(4) == "0") { - patch.length2 = 0; - } else { - patch.start2--; - patch.length2 = patchHeader.cap(4).toInt(); - } - text.removeFirst(); - - while (!text.isEmpty()) { - if (text.front().isEmpty()) { + patch.start2 = patchHeader.cap( 3 ).toInt(); + if ( patchHeader.cap( 4 ).isEmpty() ) + { + patch.start2--; + patch.length2 = 1; + } + else if ( patchHeader.cap( 4 ) == "0" ) + { + patch.length2 = 0; + } + else + { + patch.start2--; + patch.length2 = patchHeader.cap( 4 ).toInt(); + } text.removeFirst(); - continue; - } - sign = text.front()[0].toAscii(); - line = safeMid(text.front(), 1); - line = line.replace("+", "%2B"); // decode would change all "+" to " " - line = QUrl::fromPercentEncoding(qPrintable(line)); - if (sign == '-') { - // Deletion. - patch.diffs.append(Diff(DELETE, line)); - } else if (sign == '+') { - // Insertion. - patch.diffs.append(Diff(INSERT, line)); - } else if (sign == ' ') { - // Minor equality. - patch.diffs.append(Diff(EQUAL, line)); - } else if (sign == '@') { - // Start of next patch. - break; - } else { - // WTF? - throw QString("Invalid patch mode '%1' in: %2").arg(sign).arg(line); - return QList(); - } - text.removeFirst(); - } - - patches.append(patch); - - } - return patches; + + while ( !text.isEmpty() ) + { + if ( text.front().isEmpty() ) + { + text.removeFirst(); + continue; + } + sign = text.front()[ 0 ].toAscii(); + line = safeMid( text.front(), 1 ); + line = line.replace( "+", "%2B" ); // decode would change all "+" to " " + line = QUrl::fromPercentEncoding( qPrintable( line ) ); + if ( sign == '-' ) + { + // Deletion. + patch.diffs.append( Diff( DELETE, line ) ); + } + else if ( sign == '+' ) + { + // Insertion. + patch.diffs.append( Diff( INSERT, line ) ); + } + else if ( sign == ' ' ) + { + // Minor equality. + patch.diffs.append( Diff( EQUAL, line ) ); + } + else if ( sign == '@' ) + { + // Start of next patch. + break; + } + else + { + // WTF? + throw QString( "Invalid patch mode '%1' in: %2" ).arg( sign ).arg( line ); + return QList< Patch >(); + } + text.removeFirst(); + } + + patches.append( patch ); + } + return patches; } diff --git a/cpp/diff_match_patch.h b/cpp/diff_match_patch.h index 82d32832..ee9ec44a 100644 --- a/cpp/diff_match_patch.h +++ b/cpp/diff_match_patch.h @@ -56,111 +56,109 @@ */ - /**- * The data structure representing a diff is a Linked list of Diff objects: * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), * Diff(Operation.EQUAL, " world.")} * which means: delete "Hello", add "Goodbye" and keep " world." */ -enum Operation { - DELETE, INSERT, EQUAL +enum Operation +{ + DELETE, + INSERT, + EQUAL }; - /** * Class representing one diff operation. */ -class Diff { - public: - Operation operation; - // One of: INSERT, DELETE or EQUAL. - QString text; - // The text associated with this diff operation. - - /** +class Diff +{ +public: + Operation operation; + // One of: INSERT, DELETE or EQUAL. + QString text; + // The text associated with this diff operation. + + /** * Constructor. Initializes the diff with the provided values. * @param operation One of INSERT, DELETE or EQUAL. * @param text The text being applied. */ - Diff(Operation _operation, const QString &_text); - Diff(); - inline bool isNull() const; - QString toString() const; - bool operator==(const Diff &d) const; - bool operator!=(const Diff &d) const; + Diff( Operation _operation, const QString &_text ); + Diff(); + inline bool isNull() const; + QString toString() const; + bool operator==( const Diff &d ) const; + bool operator!=( const Diff &d ) const; - static QString strOperation(Operation op); + static QString strOperation( Operation op ); }; - /** * Class representing one patch operation. */ -class Patch { - public: - QList diffs; - int start1; - int start2; - int length1; - int length2; - - /** +class Patch +{ +public: + QList< Diff > diffs; + int start1; + int start2; + int length1; + int length2; + + /** * Constructor. Initializes with an empty list of diffs. */ - Patch(); - bool isNull() const; - QString toString(); + Patch(); + bool isNull() const; + QString toString(); }; - /** * Class containing the diff, match and patch methods. * Also contains the behaviour settings. */ -class diff_match_patch { - - friend class diff_match_patch_test; - - public: - // Defaults. - // Set these on your diff_match_patch instance to override the defaults. - - // Number of seconds to map a diff before giving up (0 for infinity). - float Diff_Timeout; - // Cost of an empty edit operation in terms of edit characters. - short Diff_EditCost; - // At what point is no match declared (0.0 = perfection, 1.0 = very loose). - float Match_Threshold; - // How far to search for a match (0 = exact location, 1000+ = broad match). - // A match this many characters away from the expected location will add - // 1.0 to the score (0.0 is a perfect match). - int Match_Distance; - // When deleting a large block of text (over ~64 characters), how close does - // the contents have to match the expected contents. (0.0 = perfection, - // 1.0 = very loose). Note that Match_Threshold controls how closely the - // end points of a delete need to match. - float Patch_DeleteThreshold; - // Chunk size for context length. - short Patch_Margin; - - // The number of bits in an int. - short Match_MaxBits; - - private: - // Define some regex patterns for matching boundaries. - static QRegExp BLANKLINEEND; - static QRegExp BLANKLINESTART; - - - public: - - diff_match_patch(); - - // DIFF FUNCTIONS - - - /** +class diff_match_patch +{ + friend class diff_match_patch_test; + +public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int Match_Distance; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold; + // Chunk size for context length. + short Patch_Margin; + + // The number of bits in an int. + short Match_MaxBits; + +private: + // Define some regex patterns for matching boundaries. + static QRegExp BLANKLINEEND; + static QRegExp BLANKLINESTART; + +public: + diff_match_patch(); + + // DIFF FUNCTIONS + + /** * Find the differences between two texts. * Run a faster slightly less optimal diff. * This method allows the 'checklines' of diff_main() to be optional. @@ -169,9 +167,9 @@ class diff_match_patch { * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ - QList diff_main(const QString &text1, const QString &text2); + QList< Diff > diff_main( const QString &text1, const QString &text2 ); - /** + /** * Find the differences between two texts. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. @@ -180,9 +178,9 @@ class diff_match_patch { * If true, then run a faster slightly less optimal diff. * @return Linked List of Diff objects. */ - QList diff_main(const QString &text1, const QString &text2, bool checklines); + QList< Diff > diff_main( const QString &text1, const QString &text2, bool checklines ); - /** + /** * Find the differences between two texts. Simplifies the problem by * stripping any common prefix or suffix off the texts before diffing. * @param text1 Old string to be diffed. @@ -194,10 +192,10 @@ class diff_match_patch { * internally for recursive calls. Users should set DiffTimeout instead. * @return Linked List of Diff objects. */ - private: - QList diff_main(const QString &text1, const QString &text2, bool checklines, clock_t deadline); +private: + QList< Diff > diff_main( const QString &text1, const QString &text2, bool checklines, clock_t deadline ); - /** + /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * @param text1 Old string to be diffed. @@ -208,10 +206,10 @@ class diff_match_patch { * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ - private: - QList diff_compute(QString text1, QString text2, bool checklines, clock_t deadline); +private: + QList< Diff > diff_compute( QString text1, QString text2, bool checklines, clock_t deadline ); - /** + /** * Do a quick line-level diff on both strings, then rediff the parts for * greater accuracy. * This speedup can produce non-minimal diffs. @@ -220,10 +218,10 @@ class diff_match_patch { * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ - private: - QList diff_lineMode(QString text1, QString text2, clock_t deadline); +private: + QList< Diff > diff_lineMode( QString text1, QString text2, clock_t deadline ); - /** + /** * Find the 'middle snake' of a diff, split the problem in two * and return the recursively constructed diff. * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. @@ -231,10 +229,10 @@ class diff_match_patch { * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ - protected: - QList diff_bisect(const QString &text1, const QString &text2, clock_t deadline); +protected: + QList< Diff > diff_bisect( const QString &text1, const QString &text2, clock_t deadline ); - /** + /** * Given the location of the 'middle snake', split the diff in two parts * and recurse. * @param text1 Old string to be diffed. @@ -244,10 +242,10 @@ class diff_match_patch { * @param deadline Time at which to bail if not yet complete. * @return LinkedList of Diff objects. */ - private: - QList diff_bisectSplit(const QString &text1, const QString &text2, int x, int y, clock_t deadline); +private: + QList< Diff > diff_bisectSplit( const QString &text1, const QString &text2, int x, int y, clock_t deadline ); - /** + /** * Split two texts into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text1 First string. @@ -256,10 +254,10 @@ class diff_match_patch { * encoded text2 and the List of unique strings. The zeroth element * of the List of unique strings is intentionally blank. */ - protected: - QList diff_linesToChars(const QString &text1, const QString &text2); // return elems 0 and 1 are QString, elem 2 is QStringList +protected: + QList< QVariant > diff_linesToChars( const QString &text1, const QString &text2 ); // return elems 0 and 1 are QString, elem 2 is QStringList - /** + /** * Split a text into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text String to encode. @@ -267,48 +265,47 @@ class diff_match_patch { * @param lineHash Map of strings to indices. * @return Encoded string. */ - private: - QString diff_linesToCharsMunge(const QString &text, QStringList &lineArray, - QMap &lineHash); +private: + QString diff_linesToCharsMunge( const QString &text, QStringList &lineArray, QMap< QString, int > &lineHash ); - /** + /** * Rehydrate the text in a diff from a string of line hashes to real lines of * text. * @param diffs LinkedList of Diff objects. * @param lineArray List of unique strings. */ - private: - void diff_charsToLines(QList &diffs, const QStringList &lineArray); +private: + void diff_charsToLines( QList< Diff > &diffs, const QStringList &lineArray ); - /** + /** * Determine the common prefix of two strings. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the start of each string. */ - public: - int diff_commonPrefix(const QString &text1, const QString &text2); +public: + int diff_commonPrefix( const QString &text1, const QString &text2 ); - /** + /** * Determine the common suffix of two strings. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of each string. */ - public: - int diff_commonSuffix(const QString &text1, const QString &text2); +public: + int diff_commonSuffix( const QString &text1, const QString &text2 ); - /** + /** * Determine if the suffix of one string is the prefix of another. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of the first * string and the start of the second string. */ - protected: - int diff_commonOverlap(const QString &text1, const QString &text2); +protected: + int diff_commonOverlap( const QString &text1, const QString &text2 ); - /** + /** * Do the two texts share a substring which is at least half the length of * the longer text? * This speedup can produce non-minimal diffs. @@ -318,10 +315,10 @@ class diff_match_patch { * suffix of text1, the prefix of text2, the suffix of text2 and the * common middle. Or null if there was no match. */ - protected: - QStringList diff_halfMatch(const QString &text1, const QString &text2); +protected: + QStringList diff_halfMatch( const QString &text1, const QString &text2 ); - /** + /** * Does a substring of shorttext exist within longtext such that the * substring is at least half the length of longtext? * @param longtext Longer string. @@ -331,26 +328,26 @@ class diff_match_patch { * suffix of longtext, the prefix of shorttext, the suffix of shorttext * and the common middle. Or null if there was no match. */ - private: - QStringList diff_halfMatchI(const QString &longtext, const QString &shorttext, int i); +private: + QStringList diff_halfMatchI( const QString &longtext, const QString &shorttext, int i ); - /** + /** * Reduce the number of edits by eliminating semantically trivial equalities. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupSemantic(QList &diffs); +public: + void diff_cleanupSemantic( QList< Diff > &diffs ); - /** + /** * Look for single edits surrounded on both sides by equalities * which can be shifted sideways to align the edit to a word boundary. * e.g: The cat came. -> The cat came. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupSemanticLossless(QList &diffs); +public: + void diff_cleanupSemanticLossless( QList< Diff > &diffs ); - /** + /** * Given two strings, compute a score representing whether the internal * boundary falls on logical boundaries. * Scores range from 6 (best) to 0 (worst). @@ -358,25 +355,25 @@ class diff_match_patch { * @param two Second string. * @return The score. */ - private: - int diff_cleanupSemanticScore(const QString &one, const QString &two); +private: + int diff_cleanupSemanticScore( const QString &one, const QString &two ); - /** + /** * Reduce the number of edits by eliminating operationally trivial equalities. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupEfficiency(QList &diffs); +public: + void diff_cleanupEfficiency( QList< Diff > &diffs ); - /** + /** * Reorder and merge like edit sections. Merge equalities. * Any edit section can move as long as it doesn't cross an equality. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupMerge(QList &diffs); +public: + void diff_cleanupMerge( QList< Diff > &diffs ); - /** + /** * loc is a location in text1, compute and return the equivalent location in * text2. * e.g. "The cat" vs "The big cat", 1->1, 5->8 @@ -384,43 +381,43 @@ class diff_match_patch { * @param loc Location within text1. * @return Location within text2. */ - public: - int diff_xIndex(const QList &diffs, int loc); +public: + int diff_xIndex( const QList< Diff > &diffs, int loc ); - /** + /** * Convert a Diff list into a pretty HTML report. * @param diffs LinkedList of Diff objects. * @return HTML representation. */ - public: - QString diff_prettyHtml(const QList &diffs); +public: + QString diff_prettyHtml( const QList< Diff > &diffs ); - /** + /** * Compute and return the source text (all equalities and deletions). * @param diffs LinkedList of Diff objects. * @return Source text. */ - public: - QString diff_text1(const QList &diffs); +public: + QString diff_text1( const QList< Diff > &diffs ); - /** + /** * Compute and return the destination text (all equalities and insertions). * @param diffs LinkedList of Diff objects. * @return Destination text. */ - public: - QString diff_text2(const QList &diffs); +public: + QString diff_text2( const QList< Diff > &diffs ); - /** + /** * Compute the Levenshtein distance; the number of inserted, deleted or * substituted characters. * @param diffs LinkedList of Diff objects. * @return Number of changes. */ - public: - int diff_levenshtein(const QList &diffs); +public: + int diff_levenshtein( const QList< Diff > &diffs ); - /** + /** * Crush the diff into an encoded string which describes the operations * required to transform text1 into text2. * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. @@ -428,10 +425,10 @@ class diff_match_patch { * @param diffs Array of diff tuples. * @return Delta text. */ - public: - QString diff_toDelta(const QList &diffs); +public: + QString diff_toDelta( const QList< Diff > &diffs ); - /** + /** * Given the original text1, and an encoded string which describes the * operations required to transform text1 into text2, compute the full diff. * @param text1 Source string for the diff. @@ -439,14 +436,12 @@ class diff_match_patch { * @return Array of diff tuples or null if invalid. * @throws QString If invalid input. */ - public: - QList diff_fromDelta(const QString &text1, const QString &delta); +public: + QList< Diff > diff_fromDelta( const QString &text1, const QString &delta ); + // MATCH FUNCTIONS - // MATCH FUNCTIONS - - - /** + /** * Locate the best instance of 'pattern' in 'text' near 'loc'. * Returns -1 if no match found. * @param text The text to search. @@ -454,10 +449,10 @@ class diff_match_patch { * @param loc The location to search around. * @return Best match index or -1. */ - public: - int match_main(const QString &text, const QString &pattern, int loc); +public: + int match_main( const QString &text, const QString &pattern, int loc ); - /** + /** * Locate the best instance of 'pattern' in 'text' near 'loc' using the * Bitap algorithm. Returns -1 if no match found. * @param text The text to search. @@ -465,10 +460,10 @@ class diff_match_patch { * @param loc The location to search around. * @return Best match index or -1. */ - protected: - int match_bitap(const QString &text, const QString &pattern, int loc); +protected: + int match_bitap( const QString &text, const QString &pattern, int loc ); - /** + /** * Compute and return the score for a match with e errors and x location. * @param e Number of errors in match. * @param x Location of match. @@ -476,50 +471,48 @@ class diff_match_patch { * @param pattern Pattern being sought. * @return Overall score for match (0.0 = good, 1.0 = bad). */ - private: - double match_bitapScore(int e, int x, int loc, const QString &pattern); +private: + double match_bitapScore( int e, int x, int loc, const QString &pattern ); - /** + /** * Initialise the alphabet for the Bitap algorithm. * @param pattern The text to encode. * @return Hash of character locations. */ - protected: - QMap match_alphabet(const QString &pattern); - - - // PATCH FUNCTIONS +protected: + QMap< QChar, int > match_alphabet( const QString &pattern ); + // PATCH FUNCTIONS - /** + /** * Increase the context until it is unique, * but don't let the pattern expand beyond Match_MaxBits. * @param patch The patch to grow. * @param text Source text. */ - protected: - void patch_addContext(Patch &patch, const QString &text); +protected: + void patch_addContext( Patch &patch, const QString &text ); - /** + /** * Compute a list of patches to turn text1 into text2. * A set of diffs will be computed. * @param text1 Old text. * @param text2 New text. * @return LinkedList of Patch objects. */ - public: - QList patch_make(const QString &text1, const QString &text2); +public: + QList< Patch > patch_make( const QString &text1, const QString &text2 ); - /** + /** * Compute a list of patches to turn text1 into text2. * text1 will be derived from the provided diffs. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. */ - public: - QList patch_make(const QList &diffs); +public: + QList< Patch > patch_make( const QList< Diff > &diffs ); - /** + /** * Compute a list of patches to turn text1 into text2. * text2 is ignored, diffs are the delta between text1 and text2. * @param text1 Old text. @@ -528,28 +521,28 @@ class diff_match_patch { * @return LinkedList of Patch objects. * @deprecated Prefer patch_make(const QString &text1, const QList &diffs). */ - public: - QList patch_make(const QString &text1, const QString &text2, const QList &diffs); +public: + QList< Patch > patch_make( const QString &text1, const QString &text2, const QList< Diff > &diffs ); - /** + /** * Compute a list of patches to turn text1 into text2. * text2 is not provided, diffs are the delta between text1 and text2. * @param text1 Old text. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. */ - public: - QList patch_make(const QString &text1, const QList &diffs); +public: + QList< Patch > patch_make( const QString &text1, const QList< Diff > &diffs ); - /** + /** * Given an array of patches, return another array that is identical. * @param patches Array of patch objects. * @return Array of patch objects. */ - public: - QList patch_deepCopy(QList &patches); +public: + QList< Patch > patch_deepCopy( QList< Patch > &patches ); - /** + /** * Merge a set of patches onto the text. Return a patched text, as well * as an array of true/false values indicating which patches were applied. * @param patches Array of patch objects. @@ -557,58 +550,56 @@ class diff_match_patch { * @return Two element Object array, containing the new text and an array of * boolean values. */ - public: - QPair > patch_apply(QList &patches, const QString &text); +public: + QPair< QString, QVector< bool > > patch_apply( QList< Patch > &patches, const QString &text ); - /** + /** * Add some padding on text start and end so that edges can match something. * Intended to be called only from within patch_apply. * @param patches Array of patch objects. * @return The padding string added to each side. */ - public: - QString patch_addPadding(QList &patches); +public: + QString patch_addPadding( QList< Patch > &patches ); - /** + /** * Look through the patches and break up any which are longer than the * maximum limit of the match algorithm. * Intended to be called only from within patch_apply. * @param patches LinkedList of Patch objects. */ - public: - void patch_splitMax(QList &patches); +public: + void patch_splitMax( QList< Patch > &patches ); - /** + /** * Take a list of patches and return a textual representation. * @param patches List of Patch objects. * @return Text representation of patches. */ - public: - QString patch_toText(const QList &patches); +public: + QString patch_toText( const QList< Patch > &patches ); - /** + /** * Parse a textual representation of patches and return a List of Patch * objects. * @param textline Text representation of patches. * @return List of Patch objects. * @throws QString If invalid input. */ - public: - QList patch_fromText(const QString &textline); +public: + QList< Patch > patch_fromText( const QString &textline ); - /** + /** * A safer version of QString.mid(pos). This one returns "" instead of * null when the postion equals the string length. * @param str String to take a substring from. * @param pos Position to start the substring from. * @return Substring. */ - private: - static inline QString safeMid(const QString &str, int pos) { - return (pos == str.length()) ? QString("") : str.mid(pos); - } +private: + static inline QString safeMid( const QString &str, int pos ) { return ( pos == str.length() ) ? QString( "" ) : str.mid( pos ); } - /** + /** * A safer version of QString.mid(pos, len). This one returns "" instead of * null when the postion equals the string length. * @param str String to take a substring from. @@ -616,10 +607,8 @@ class diff_match_patch { * @param len Length of substring. * @return Substring. */ - private: - static inline QString safeMid(const QString &str, int pos, int len) { - return (pos == str.length()) ? QString("") : str.mid(pos, len); - } +private: + static inline QString safeMid( const QString &str, int pos, int len ) { return ( pos == str.length() ) ? QString( "" ) : str.mid( pos, len ); } }; -#endif // DIFF_MATCH_PATCH_H +#endif // DIFF_MATCH_PATCH_H diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp index f75b1cd0..0d0a39c3 100644 --- a/cpp/diff_match_patch_test.cpp +++ b/cpp/diff_match_patch_test.cpp @@ -21,419 +21,445 @@ #include "diff_match_patch.h" #include "diff_match_patch_test.h" -int main(int argc, char **argv) { - diff_match_patch_test dmp_test; - qDebug("Starting diff_match_patch unit tests."); - dmp_test.run_all_tests(); - qDebug("Done."); - return 0; - Q_UNUSED(argc) - Q_UNUSED(argv) +int main( int argc, char **argv ) +{ + diff_match_patch_test dmp_test; + qDebug( "Starting diff_match_patch unit tests." ); + dmp_test.run_all_tests(); + qDebug( "Done." ); + return 0; + Q_UNUSED( argc ) + Q_UNUSED( argv ) } - -diff_match_patch_test::diff_match_patch_test() { +diff_match_patch_test::diff_match_patch_test() +{ } -void diff_match_patch_test::run_all_tests() { - QTime t; - t.start(); - try { - testDiffCommonPrefix(); - testDiffCommonSuffix(); - testDiffCommonOverlap(); - testDiffHalfmatch(); - testDiffLinesToChars(); - testDiffCharsToLines(); - testDiffCleanupMerge(); - testDiffCleanupSemanticLossless(); - testDiffCleanupSemantic(); - testDiffCleanupEfficiency(); - testDiffPrettyHtml(); - testDiffText(); - testDiffDelta(); - testDiffXIndex(); - testDiffLevenshtein(); - testDiffBisect(); - testDiffMain(); - - testMatchAlphabet(); - testMatchBitap(); - testMatchMain(); - - testPatchObj(); - testPatchFromText(); - testPatchToText(); - testPatchAddContext(); - testPatchMake(); - testPatchSplitMax(); - testPatchAddPadding(); - testPatchApply(); - qDebug("All tests passed."); - } catch (QString strCase) { - qDebug("Test failed: %s", qPrintable(strCase)); - } - qDebug("Total time: %d ms", t.elapsed()); +void diff_match_patch_test::run_all_tests() +{ + QTime t; + t.start(); + try + { + testDiffCommonPrefix(); + testDiffCommonSuffix(); + testDiffCommonOverlap(); + testDiffHalfmatch(); + testDiffLinesToChars(); + testDiffCharsToLines(); + testDiffCleanupMerge(); + testDiffCleanupSemanticLossless(); + testDiffCleanupSemantic(); + testDiffCleanupEfficiency(); + testDiffPrettyHtml(); + testDiffText(); + testDiffDelta(); + testDiffXIndex(); + testDiffLevenshtein(); + testDiffBisect(); + testDiffMain(); + + testMatchAlphabet(); + testMatchBitap(); + testMatchMain(); + + testPatchObj(); + testPatchFromText(); + testPatchToText(); + testPatchAddContext(); + testPatchMake(); + testPatchSplitMax(); + testPatchAddPadding(); + testPatchApply(); + qDebug( "All tests passed." ); + } + catch ( QString strCase ) + { + qDebug( "Test failed: %s", qPrintable( strCase ) ); + } + qDebug( "Total time: %d ms", t.elapsed() ); } // DIFF TEST FUNCTIONS -void diff_match_patch_test::testDiffCommonPrefix() { - // Detect any common prefix. - assertEquals("diff_commonPrefix: Null case.", 0, dmp.diff_commonPrefix("abc", "xyz")); +void diff_match_patch_test::testDiffCommonPrefix() +{ + // Detect any common prefix. + assertEquals( "diff_commonPrefix: Null case.", 0, dmp.diff_commonPrefix( "abc", "xyz" ) ); - assertEquals("diff_commonPrefix: Non-null case.", 4, dmp.diff_commonPrefix("1234abcdef", "1234xyz")); + assertEquals( "diff_commonPrefix: Non-null case.", 4, dmp.diff_commonPrefix( "1234abcdef", "1234xyz" ) ); - assertEquals("diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix("1234", "1234xyz")); + assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( "1234", "1234xyz" ) ); } -void diff_match_patch_test::testDiffCommonSuffix() { - // Detect any common suffix. - assertEquals("diff_commonSuffix: Null case.", 0, dmp.diff_commonSuffix("abc", "xyz")); +void diff_match_patch_test::testDiffCommonSuffix() +{ + // Detect any common suffix. + assertEquals( "diff_commonSuffix: Null case.", 0, dmp.diff_commonSuffix( "abc", "xyz" ) ); - assertEquals("diff_commonSuffix: Non-null case.", 4, dmp.diff_commonSuffix("abcdef1234", "xyz1234")); + assertEquals( "diff_commonSuffix: Non-null case.", 4, dmp.diff_commonSuffix( "abcdef1234", "xyz1234" ) ); - assertEquals("diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix("1234", "xyz1234")); + assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( "1234", "xyz1234" ) ); } -void diff_match_patch_test::testDiffCommonOverlap() { - // Detect any suffix/prefix overlap. - assertEquals("diff_commonOverlap: Null case.", 0, dmp.diff_commonOverlap("", "abcd")); +void diff_match_patch_test::testDiffCommonOverlap() +{ + // Detect any suffix/prefix overlap. + assertEquals( "diff_commonOverlap: Null case.", 0, dmp.diff_commonOverlap( "", "abcd" ) ); - assertEquals("diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap("abc", "abcd")); + assertEquals( "diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap( "abc", "abcd" ) ); - assertEquals("diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap("123456", "abcd")); + assertEquals( "diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap( "123456", "abcd" ) ); - assertEquals("diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap("123456xxx", "xxxabcd")); + assertEquals( "diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap( "123456xxx", "xxxabcd" ) ); - // Some overly clever languages (C#) may treat ligatures as equal to their - // component letters. E.g. U+FB01 == 'fi' - assertEquals("diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap("fi", QString::fromWCharArray((const wchar_t*) L"\ufb01i", 2))); + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals( "diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap( "fi", QString::fromWCharArray( (const wchar_t *)L"\ufb01i", 2 ) ) ); } -void diff_match_patch_test::testDiffHalfmatch() { - // Detect a halfmatch. - dmp.Diff_Timeout = 1; - assertEmpty("diff_halfMatch: No match #1.", dmp.diff_halfMatch("1234567890", "abcdef")); +void diff_match_patch_test::testDiffHalfmatch() +{ + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty( "diff_halfMatch: No match #1.", dmp.diff_halfMatch( "1234567890", "abcdef" ) ); - assertEmpty("diff_halfMatch: No match #2.", dmp.diff_halfMatch("12345", "23")); + assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( "12345", "23" ) ); - assertEquals("diff_halfMatch: Single Match #1.", QString("12,90,a,z,345678").split(","), dmp.diff_halfMatch("1234567890", "a345678z")); + assertEquals( "diff_halfMatch: Single Match #1.", QString( "12,90,a,z,345678" ).split( "," ), dmp.diff_halfMatch( "1234567890", "a345678z" ) ); - assertEquals("diff_halfMatch: Single Match #2.", QString("a,z,12,90,345678").split(","), dmp.diff_halfMatch("a345678z", "1234567890")); + assertEquals( "diff_halfMatch: Single Match #2.", QString( "a,z,12,90,345678" ).split( "," ), dmp.diff_halfMatch( "a345678z", "1234567890" ) ); - assertEquals("diff_halfMatch: Single Match #3.", QString("abc,z,1234,0,56789").split(","), dmp.diff_halfMatch("abc56789z", "1234567890")); + assertEquals( "diff_halfMatch: Single Match #3.", QString( "abc,z,1234,0,56789" ).split( "," ), dmp.diff_halfMatch( "abc56789z", "1234567890" ) ); - assertEquals("diff_halfMatch: Single Match #4.", QString("a,xyz,1,7890,23456").split(","), dmp.diff_halfMatch("a23456xyz", "1234567890")); + assertEquals( "diff_halfMatch: Single Match #4.", QString( "a,xyz,1,7890,23456" ).split( "," ), dmp.diff_halfMatch( "a23456xyz", "1234567890" ) ); - assertEquals("diff_halfMatch: Multiple Matches #1.", QString("12123,123121,a,z,1234123451234").split(","), dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); + assertEquals( "diff_halfMatch: Multiple Matches #1.", QString( "12123,123121,a,z,1234123451234" ).split( "," ), dmp.diff_halfMatch( "121231234123451234123121", "a1234123451234z" ) ); - assertEquals("diff_halfMatch: Multiple Matches #2.", QString(",-=-=-=-=-=,x,,x-=-=-=-=-=-=-=").split(","), dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); + assertEquals( "diff_halfMatch: Multiple Matches #2.", QString( ",-=-=-=-=-=,x,,x-=-=-=-=-=-=-=" ).split( "," ), dmp.diff_halfMatch( "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=" ) ); - assertEquals("diff_halfMatch: Multiple Matches #3.", QString("-=-=-=-=-=,,,y,-=-=-=-=-=-=-=y").split(","), dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); + assertEquals( "diff_halfMatch: Multiple Matches #3.", QString( "-=-=-=-=-=,,,y,-=-=-=-=-=-=-=y" ).split( "," ), dmp.diff_halfMatch( "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy" ) ); - // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy - assertEquals("diff_halfMatch: Non-optimal halfmatch.", QString("qHillo,w,x,Hulloy,HelloHe").split(","), dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + assertEquals( "diff_halfMatch: Non-optimal halfmatch.", QString( "qHillo,w,x,Hulloy,HelloHe" ).split( "," ), dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); - dmp.Diff_Timeout = 0; - assertEmpty("diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + dmp.Diff_Timeout = 0; + assertEmpty( "diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); } -void diff_match_patch_test::testDiffLinesToChars() { - // Convert lines down to characters. - QStringList tmpVector; - QList tmpVarList; - tmpVector.append(""); - tmpVector.append("alpha\n"); - tmpVector.append("beta\n"); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)1)); //(("\u0001\u0002\u0001")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)2) + QChar((ushort)1) + QChar((ushort)2)); // (("\u0002\u0001\u0002")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.append(""); - tmpVector.append("alpha\r\n"); - tmpVector.append("beta\r\n"); - tmpVector.append("\r\n"); - tmpVarList << QVariant::fromValue(QString("")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)3) + QChar((ushort)3)); // (("\u0001\u0002\u0003\u0003")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.append(""); - tmpVector.append("a"); - tmpVector.append("b"); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1)); // (("\u0001")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)2)); // (("\u0002")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("a", "b")); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - tmpVarList.clear(); - QString lines; - QString chars; - for (int x = 1; x < n + 1; x++) { - tmpVector.append(QString::number(x) + "\n"); - lines += QString::number(x) + "\n"; - chars += QChar(static_cast(x)); - } - assertEquals("diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); - assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.prepend(""); - tmpVarList << QVariant::fromValue(chars); - tmpVarList << QVariant::fromValue(QString("")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, "")); +void diff_match_patch_test::testDiffLinesToChars() +{ + // Convert lines down to characters. + QStringList tmpVector; + QList< QVariant > tmpVarList; + tmpVector.append( "" ); + tmpVector.append( "alpha\n" ); + tmpVector.append( "beta\n" ); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)1 ) + QChar( (ushort)2 ) + QChar( (ushort)1 ) ); //(("\u0001\u0002\u0001")); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)2 ) + QChar( (ushort)1 ) + QChar( (ushort)2 ) ); // (("\u0002\u0001\u0002")); + tmpVarList << QVariant::fromValue( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.append( "" ); + tmpVector.append( "alpha\r\n" ); + tmpVector.append( "beta\r\n" ); + tmpVector.append( "\r\n" ); + tmpVarList << QVariant::fromValue( QString( "" ) ); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)1 ) + QChar( (ushort)2 ) + QChar( (ushort)3 ) + QChar( (ushort)3 ) ); // (("\u0001\u0002\u0003\u0003")); + tmpVarList << QVariant::fromValue( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "", "alpha\r\nbeta\r\n\r\n\r\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.append( "" ); + tmpVector.append( "a" ); + tmpVector.append( "b" ); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)1 ) ); // (("\u0001")); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)2 ) ); // (("\u0002")); + tmpVarList << QVariant::fromValue( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "a", "b" ) ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + QString lines; + QString chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.append( QString::number( x ) + "\n" ); + lines += QString::number( x ) + "\n"; + chars += QChar( static_cast< ushort >( x ) ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.prepend( "" ); + tmpVarList << QVariant::fromValue( chars ); + tmpVarList << QVariant::fromValue( QString( "" ) ); + tmpVarList << QVariant::fromValue( tmpVector ); + assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, "" ) ); } -void diff_match_patch_test::testDiffCharsToLines() { - // First check that Diff equality works. - assertTrue("diff_charsToLines:", Diff(EQUAL, "a") == Diff(EQUAL, "a")); - - assertEquals("diff_charsToLines:", Diff(EQUAL, "a"), Diff(EQUAL, "a")); - - // Convert chars up to lines. - QList diffs; - diffs << Diff(EQUAL, QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)1)); // ("\u0001\u0002\u0001"); - diffs << Diff(INSERT, QString() + QChar((ushort)2) + QChar((ushort)1) + QChar((ushort)2)); // ("\u0002\u0001\u0002"); - QStringList tmpVector; - tmpVector.append(""); - tmpVector.append("alpha\n"); - tmpVector.append("beta\n"); - dmp.diff_charsToLines(diffs, tmpVector); - assertEquals("diff_charsToLines:", diffList(Diff(EQUAL, "alpha\nbeta\nalpha\n"), Diff(INSERT, "beta\nalpha\nbeta\n")), diffs); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - QList tmpVarList; - QString lines; - QString chars; - for (int x = 1; x < n + 1; x++) { - tmpVector.append(QString::number(x) + "\n"); - lines += QString::number(x) + "\n"; - chars += QChar(static_cast(x)); - } - assertEquals("diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); - assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.prepend(""); - diffs = diffList(Diff(DELETE, chars)); - dmp.diff_charsToLines(diffs, tmpVector); - assertEquals("diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); +void diff_match_patch_test::testDiffCharsToLines() +{ + // First check that Diff equality works. + assertTrue( "diff_charsToLines:", Diff( EQUAL, "a" ) == Diff( EQUAL, "a" ) ); + + assertEquals( "diff_charsToLines:", Diff( EQUAL, "a" ), Diff( EQUAL, "a" ) ); + + // Convert chars up to lines. + QList< Diff > diffs; + diffs << Diff( EQUAL, QString() + QChar( (ushort)1 ) + QChar( (ushort)2 ) + QChar( (ushort)1 ) ); // ("\u0001\u0002\u0001"); + diffs << Diff( INSERT, QString() + QChar( (ushort)2 ) + QChar( (ushort)1 ) + QChar( (ushort)2 ) ); // ("\u0002\u0001\u0002"); + QStringList tmpVector; + tmpVector.append( "" ); + tmpVector.append( "alpha\n" ); + tmpVector.append( "beta\n" ); + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines:", diffList( Diff( EQUAL, "alpha\nbeta\nalpha\n" ), Diff( INSERT, "beta\nalpha\nbeta\n" ) ), diffs ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + QList< QVariant > tmpVarList; + QString lines; + QString chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.append( QString::number( x ) + "\n" ); + lines += QString::number( x ) + "\n"; + chars += QChar( static_cast< ushort >( x ) ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.prepend( "" ); + diffs = diffList( Diff( DELETE, chars ) ); + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines: More than 256.", diffList( Diff( DELETE, lines ) ), diffs ); } -void diff_match_patch_test::testDiffCleanupMerge() { - // Cleanup a messy diff. - QList diffs; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Null case.", diffList(), diffs); +void diff_match_patch_test::testDiffCleanupMerge() +{ + // Cleanup a messy diff. + QList< Diff > diffs; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Null case.", diffList(), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: No change case.", diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: No change case.", diffList( Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(EQUAL, "b"), Diff(EQUAL, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge equalities.", diffList(Diff(EQUAL, "abc")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( EQUAL, "b" ), Diff( EQUAL, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge equalities.", diffList( Diff( EQUAL, "abc" ) ), diffs ); - diffs = diffList(Diff(DELETE, "a"), Diff(DELETE, "b"), Diff(DELETE, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge deletions.", diffList(Diff(DELETE, "abc")), diffs); + diffs = diffList( Diff( DELETE, "a" ), Diff( DELETE, "b" ), Diff( DELETE, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge deletions.", diffList( Diff( DELETE, "abc" ) ), diffs ); - diffs = diffList(Diff(INSERT, "a"), Diff(INSERT, "b"), Diff(INSERT, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge insertions.", diffList(Diff(INSERT, "abc")), diffs); + diffs = diffList( Diff( INSERT, "a" ), Diff( INSERT, "b" ), Diff( INSERT, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge insertions.", diffList( Diff( INSERT, "abc" ) ), diffs ); - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "b"), Diff(DELETE, "c"), Diff(INSERT, "d"), Diff(EQUAL, "e"), Diff(EQUAL, "f")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge interweave.", diffList(Diff(DELETE, "ac"), Diff(INSERT, "bd"), Diff(EQUAL, "ef")), diffs); + diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, "b" ), Diff( DELETE, "c" ), Diff( INSERT, "d" ), Diff( EQUAL, "e" ), Diff( EQUAL, "f" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge interweave.", diffList( Diff( DELETE, "ac" ), Diff( INSERT, "bd" ), Diff( EQUAL, "ef" ) ), diffs ); - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Prefix and suffix detection.", diffList(Diff(EQUAL, "a"), Diff(DELETE, "d"), Diff(INSERT, "b"), Diff(EQUAL, "c")), diffs); + diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", diffList( Diff( EQUAL, "a" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "c" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "x"), Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc"), Diff(EQUAL, "y")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Prefix and suffix detection with equalities.", diffList(Diff(EQUAL, "xa"), Diff(DELETE, "d"), Diff(INSERT, "b"), Diff(EQUAL, "cy")), diffs); + diffs = diffList( Diff( EQUAL, "x" ), Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ), Diff( EQUAL, "y" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", diffList( Diff( EQUAL, "xa" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "cy" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(INSERT, "ba"), Diff(EQUAL, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit left.", diffList(Diff(INSERT, "ab"), Diff(EQUAL, "ac")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( INSERT, "ba" ), Diff( EQUAL, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left.", diffList( Diff( INSERT, "ab" ), Diff( EQUAL, "ac" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "c"), Diff(INSERT, "ab"), Diff(EQUAL, "a")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit right.", diffList(Diff(EQUAL, "ca"), Diff(INSERT, "ba")), diffs); + diffs = diffList( Diff( EQUAL, "c" ), Diff( INSERT, "ab" ), Diff( EQUAL, "a" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right.", diffList( Diff( EQUAL, "ca" ), Diff( INSERT, "ba" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(EQUAL, "c"), Diff(DELETE, "ac"), Diff(EQUAL, "x")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit left recursive.", diffList(Diff(DELETE, "abc"), Diff(EQUAL, "acx")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( EQUAL, "c" ), Diff( DELETE, "ac" ), Diff( EQUAL, "x" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left recursive.", diffList( Diff( DELETE, "abc" ), Diff( EQUAL, "acx" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "x"), Diff(DELETE, "ca"), Diff(EQUAL, "c"), Diff(DELETE, "b"), Diff(EQUAL, "a")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit right recursive.", diffList(Diff(EQUAL, "xca"), Diff(DELETE, "cba")), diffs); + diffs = diffList( Diff( EQUAL, "x" ), Diff( DELETE, "ca" ), Diff( EQUAL, "c" ), Diff( DELETE, "b" ), Diff( EQUAL, "a" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right recursive.", diffList( Diff( EQUAL, "xca" ), Diff( DELETE, "cba" ) ), diffs ); } -void diff_match_patch_test::testDiffCleanupSemanticLossless() { - // Slide diffs to match logical boundaries. - QList diffs = diffList(); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); +void diff_match_patch_test::testDiffCleanupSemanticLossless() +{ + // Slide diffs to match logical boundaries. + QList< Diff > diffs = diffList(); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Null case.", diffList(), diffs ); - diffs = diffList(Diff(EQUAL, "AAA\r\n\r\nBBB"), Diff(INSERT, "\r\nDDD\r\n\r\nBBB"), Diff(EQUAL, "\r\nEEE")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemanticLossless: Blank lines.", diffList(Diff(EQUAL, "AAA\r\n\r\n"), Diff(INSERT, "BBB\r\nDDD\r\n\r\n"), Diff(EQUAL, "BBB\r\nEEE")), diffs); + diffs = diffList( Diff( EQUAL, "AAA\r\n\r\nBBB" ), Diff( INSERT, "\r\nDDD\r\n\r\nBBB" ), Diff( EQUAL, "\r\nEEE" ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Blank lines.", diffList( Diff( EQUAL, "AAA\r\n\r\n" ), Diff( INSERT, "BBB\r\nDDD\r\n\r\n" ), Diff( EQUAL, "BBB\r\nEEE" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "AAA\r\nBBB"), Diff(INSERT, " DDD\r\nBBB"), Diff(EQUAL, " EEE")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemanticLossless: Line boundaries.", diffList(Diff(EQUAL, "AAA\r\n"), Diff(INSERT, "BBB DDD\r\n"), Diff(EQUAL, "BBB EEE")), diffs); + diffs = diffList( Diff( EQUAL, "AAA\r\nBBB" ), Diff( INSERT, " DDD\r\nBBB" ), Diff( EQUAL, " EEE" ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", diffList( Diff( EQUAL, "AAA\r\n" ), Diff( INSERT, "BBB DDD\r\n" ), Diff( EQUAL, "BBB EEE" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "The c"), Diff(INSERT, "ow and the c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, "The "), Diff(INSERT, "cow and the "), Diff(EQUAL, "cat.")), diffs); + diffs = diffList( Diff( EQUAL, "The c" ), Diff( INSERT, "ow and the c" ), Diff( EQUAL, "at." ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", diffList( Diff( EQUAL, "The " ), Diff( INSERT, "cow and the " ), Diff( EQUAL, "cat." ) ), diffs ); - diffs = diffList(Diff(EQUAL, "The-c"), Diff(INSERT, "ow-and-the-c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Alphanumeric boundaries.", diffList(Diff(EQUAL, "The-"), Diff(INSERT, "cow-and-the-"), Diff(EQUAL, "cat.")), diffs); + diffs = diffList( Diff( EQUAL, "The-c" ), Diff( INSERT, "ow-and-the-c" ), Diff( EQUAL, "at." ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", diffList( Diff( EQUAL, "The-" ), Diff( INSERT, "cow-and-the-" ), Diff( EQUAL, "cat." ) ), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "a"), Diff(EQUAL, "ax")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Hitting the start.", diffList(Diff(DELETE, "a"), Diff(EQUAL, "aax")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "a" ), Diff( EQUAL, "ax" ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the start.", diffList( Diff( DELETE, "a" ), Diff( EQUAL, "aax" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "xa"), Diff(DELETE, "a"), Diff(EQUAL, "a")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Hitting the end.", diffList(Diff(EQUAL, "xaa"), Diff(DELETE, "a")), diffs); + diffs = diffList( Diff( EQUAL, "xa" ), Diff( DELETE, "a" ), Diff( EQUAL, "a" ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the end.", diffList( Diff( EQUAL, "xaa" ), Diff( DELETE, "a" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "The xxx. The "), Diff(INSERT, "zzz. The "), Diff(EQUAL, "yyy.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Sentence boundaries.", diffList(Diff(EQUAL, "The xxx."), Diff(INSERT, " The zzz."), Diff(EQUAL, " The yyy.")), diffs); + diffs = diffList( Diff( EQUAL, "The xxx. The " ), Diff( INSERT, "zzz. The " ), Diff( EQUAL, "yyy." ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Sentence boundaries.", diffList( Diff( EQUAL, "The xxx." ), Diff( INSERT, " The zzz." ), Diff( EQUAL, " The yyy." ) ), diffs ); } -void diff_match_patch_test::testDiffCleanupSemantic() { - // Cleanup semantically trivial equalities. - QList diffs = diffList(); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); +void diff_match_patch_test::testDiffCleanupSemantic() +{ + // Cleanup semantically trivial equalities. + QList< Diff > diffs = diffList(); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Null case.", diffList(), diffs ); - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), Diff(DELETE, "e")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No elimination #1.", diffList(Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), Diff(DELETE, "e")), diffs); + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #1.", diffList( Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) ), diffs ); - diffs = diffList(Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No elimination #2.", diffList(Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")), diffs); + diffs = diffList( Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #2.", diffList( Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) ), diffs ); - diffs = diffList(Diff(DELETE, "a"), Diff(EQUAL, "b"), Diff(DELETE, "c")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Simple elimination.", diffList(Diff(DELETE, "abc"), Diff(INSERT, "b")), diffs); + diffs = diffList( Diff( DELETE, "a" ), Diff( EQUAL, "b" ), Diff( DELETE, "c" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Simple elimination.", diffList( Diff( DELETE, "abc" ), Diff( INSERT, "b" ) ), diffs ); - diffs = diffList(Diff(DELETE, "ab"), Diff(EQUAL, "cd"), Diff(DELETE, "e"), Diff(EQUAL, "f"), Diff(INSERT, "g")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Backpass elimination.", diffList(Diff(DELETE, "abcdef"), Diff(INSERT, "cdfg")), diffs); + diffs = diffList( Diff( DELETE, "ab" ), Diff( EQUAL, "cd" ), Diff( DELETE, "e" ), Diff( EQUAL, "f" ), Diff( INSERT, "g" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Backpass elimination.", diffList( Diff( DELETE, "abcdef" ), Diff( INSERT, "cdfg" ) ), diffs ); - diffs = diffList(Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2"), Diff(EQUAL, "_"), Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Multiple elimination.", diffList(Diff(DELETE, "AB_AB"), Diff(INSERT, "1A2_1A2")), diffs); + diffs = diffList( Diff( INSERT, "1" ), Diff( EQUAL, "A" ), Diff( DELETE, "B" ), Diff( INSERT, "2" ), Diff( EQUAL, "_" ), Diff( INSERT, "1" ), Diff( EQUAL, "A" ), Diff( DELETE, "B" ), Diff( INSERT, "2" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Multiple elimination.", diffList( Diff( DELETE, "AB_AB" ), Diff( INSERT, "1A2_1A2" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "The c"), Diff(DELETE, "ow and the c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, "The "), Diff(DELETE, "cow and the "), Diff(EQUAL, "cat.")), diffs); + diffs = diffList( Diff( EQUAL, "The c" ), Diff( DELETE, "ow and the c" ), Diff( EQUAL, "at." ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", diffList( Diff( EQUAL, "The " ), Diff( DELETE, "cow and the " ), Diff( EQUAL, "cat." ) ), diffs ); - diffs = diffList(Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No overlap elimination.", diffList(Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")), diffs); + diffs = diffList( Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No overlap elimination.", diffList( Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) ), diffs ); - diffs = diffList(Diff(DELETE, "abcxxx"), Diff(INSERT, "xxxdef")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Overlap elimination.", diffList(Diff(DELETE, "abc"), Diff(EQUAL, "xxx"), Diff(INSERT, "def")), diffs); + diffs = diffList( Diff( DELETE, "abcxxx" ), Diff( INSERT, "xxxdef" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Overlap elimination.", diffList( Diff( DELETE, "abc" ), Diff( EQUAL, "xxx" ), Diff( INSERT, "def" ) ), diffs ); - diffs = diffList(Diff(DELETE, "xxxabc"), Diff(INSERT, "defxxx")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Reverse overlap elimination.", diffList(Diff(INSERT, "def"), Diff(EQUAL, "xxx"), Diff(DELETE, "abc")), diffs); + diffs = diffList( Diff( DELETE, "xxxabc" ), Diff( INSERT, "defxxx" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", diffList( Diff( INSERT, "def" ), Diff( EQUAL, "xxx" ), Diff( DELETE, "abc" ) ), diffs ); - diffs = diffList(Diff(DELETE, "abcd1212"), Diff(INSERT, "1212efghi"), Diff(EQUAL, "----"), Diff(DELETE, "A3"), Diff(INSERT, "3BC")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Two overlap eliminations.", diffList(Diff(DELETE, "abcd"), Diff(EQUAL, "1212"), Diff(INSERT, "efghi"), Diff(EQUAL, "----"), Diff(DELETE, "A"), Diff(EQUAL, "3"), Diff(INSERT, "BC")), diffs); + diffs = diffList( Diff( DELETE, "abcd1212" ), Diff( INSERT, "1212efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A3" ), Diff( INSERT, "3BC" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", diffList( Diff( DELETE, "abcd" ), Diff( EQUAL, "1212" ), Diff( INSERT, "efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A" ), Diff( EQUAL, "3" ), Diff( INSERT, "BC" ) ), diffs ); } -void diff_match_patch_test::testDiffCleanupEfficiency() { - // Cleanup operationally trivial equalities. - dmp.Diff_EditCost = 4; - QList diffs = diffList(); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Null case.", diffList(), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: No elimination.", diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Four-edit elimination.", diffList(Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xyz34")), diffs); - - diffs = diffList(Diff(INSERT, "12"), Diff(EQUAL, "x"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Three-edit elimination.", diffList(Diff(DELETE, "xcd"), Diff(INSERT, "12x34")), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xy"), Diff(INSERT, "34"), Diff(EQUAL, "z"), Diff(DELETE, "cd"), Diff(INSERT, "56")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Backpass elimination.", diffList(Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xy34z56")), diffs); - - dmp.Diff_EditCost = 5; - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: High cost elimination.", diffList(Diff(DELETE, "abwxyzcd"), Diff(INSERT, "12wxyz34")), diffs); - dmp.Diff_EditCost = 4; +void diff_match_patch_test::testDiffCleanupEfficiency() +{ + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + QList< Diff > diffs = diffList(); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Null case.", diffList(), diffs ); + + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: No elimination.", diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ), diffs ); + + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", diffList( Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xyz34" ) ), diffs ); + + diffs = diffList( Diff( INSERT, "12" ), Diff( EQUAL, "x" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", diffList( Diff( DELETE, "xcd" ), Diff( INSERT, "12x34" ) ), diffs ); + + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xy" ), Diff( INSERT, "34" ), Diff( EQUAL, "z" ), Diff( DELETE, "cd" ), Diff( INSERT, "56" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Backpass elimination.", diffList( Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xy34z56" ) ), diffs ); + + dmp.Diff_EditCost = 5; + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: High cost elimination.", diffList( Diff( DELETE, "abwxyzcd" ), Diff( INSERT, "12wxyz34" ) ), diffs ); + dmp.Diff_EditCost = 4; } -void diff_match_patch_test::testDiffPrettyHtml() { - // Pretty print. - QList diffs = diffList(Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")); - assertEquals("diff_prettyHtml:", "
<B>b</B>c&d", dmp.diff_prettyHtml(diffs)); +void diff_match_patch_test::testDiffPrettyHtml() +{ + // Pretty print. + QList< Diff > diffs = diffList( Diff( EQUAL, "a\n" ), Diff( DELETE, "b" ), Diff( INSERT, "c&d" ) ); + assertEquals( "diff_prettyHtml:", "
<B>b</B>c&d", dmp.diff_prettyHtml( diffs ) ); } -void diff_match_patch_test::testDiffText() { - // Compute the source and destination texts. - QList diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, " lazy")); - assertEquals("diff_text1:", "jumps over the lazy", dmp.diff_text1(diffs)); - assertEquals("diff_text2:", "jumped over a lazy", dmp.diff_text2(diffs)); +void diff_match_patch_test::testDiffText() +{ + // Compute the source and destination texts. + QList< Diff > diffs = diffList( Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ) ); + assertEquals( "diff_text1:", "jumps over the lazy", dmp.diff_text1( diffs ) ); + assertEquals( "diff_text2:", "jumped over a lazy", dmp.diff_text2( diffs ) ); } -void diff_match_patch_test::testDiffDelta() { - // Convert a diff into delta string. - QList diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, " lazy"), Diff(INSERT, "old dog")); - QString text1 = dmp.diff_text1(diffs); - assertEquals("diff_text1: Base text.", "jumps over the lazy", text1); +void diff_match_patch_test::testDiffDelta() +{ + // Convert a diff into delta string. + QList< Diff > diffs = diffList( Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ), Diff( INSERT, "old dog" ) ); + QString text1 = dmp.diff_text1( diffs ); + assertEquals( "diff_text1: Base text.", "jumps over the lazy", text1 ); - QString delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta); + QString delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); - // Convert delta string into a diff. - assertEquals("diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta(text1, delta)); + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta( text1, delta ) ); - // Generates error (19 < 20). - try { - dmp.diff_fromDelta(text1 + "x", delta); - assertFalse("diff_fromDelta: Too long.", true); - } catch (QString ex) { - // Exception expected. - } + // Generates error (19 < 20). + try + { + dmp.diff_fromDelta( text1 + "x", delta ); + assertFalse( "diff_fromDelta: Too long.", true ); + } + catch ( QString ex ) + { + // Exception expected. + } - // Generates error (19 > 18). - try { - dmp.diff_fromDelta(text1.mid(1), delta); - assertFalse("diff_fromDelta: Too short.", true); - } catch (QString ex) { - // Exception expected. - } + // Generates error (19 > 18). + try + { + dmp.diff_fromDelta( text1.mid( 1 ), delta ); + assertFalse( "diff_fromDelta: Too short.", true ); + } + catch ( QString ex ) + { + // Exception expected. + } - // Generates error (%c3%xy invalid Unicode). - /* This test does not work because QUrl::fromPercentEncoding("%xy") -> "?" + // Generates error (%c3%xy invalid Unicode). + /* This test does not work because QUrl::fromPercentEncoding("%xy") -> "?" try { dmp.diff_fromDelta("", "+%c3%xy"); assertFalse("diff_fromDelta: Invalid character.", true); @@ -442,746 +468,815 @@ void diff_match_patch_test::testDiffDelta() { } */ - // Test deltas with special characters. - diffs = diffList(Diff(EQUAL, QString::fromWCharArray((const wchar_t*) L"\u0680 \000 \t %", 7)), Diff(DELETE, QString::fromWCharArray((const wchar_t*) L"\u0681 \001 \n ^", 7)), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\u0682 \002 \\ |", 7))); - text1 = dmp.diff_text1(diffs); - assertEquals("diff_text1: Unicode text.", QString::fromWCharArray((const wchar_t*) L"\u0680 \000 \t %\u0681 \001 \n ^", 14), text1); + // Test deltas with special characters. + diffs = diffList( Diff( EQUAL, QString::fromWCharArray( (const wchar_t *)L"\u0680 \000 \t %", 7 ) ), Diff( DELETE, QString::fromWCharArray( (const wchar_t *)L"\u0681 \001 \n ^", 7 ) ), Diff( INSERT, QString::fromWCharArray( (const wchar_t *)L"\u0682 \002 \\ |", 7 ) ) ); + text1 = dmp.diff_text1( diffs ); + assertEquals( "diff_text1: Unicode text.", QString::fromWCharArray( (const wchar_t *)L"\u0680 \000 \t %\u0681 \001 \n ^", 14 ), text1 ); - delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta: Unicode.", "=7\t-7\t+%DA%82 %02 %5C %7C", delta); + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unicode.", "=7\t-7\t+%DA%82 %02 %5C %7C", delta ); - assertEquals("diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta)); + assertEquals( "diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta( text1, delta ) ); - // Verify pool of unchanged characters. - diffs = diffList(Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); - QString text2 = dmp.diff_text2(diffs); - assertEquals("diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); + // Verify pool of unchanged characters. + diffs = diffList( Diff( INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) ); + QString text2 = dmp.diff_text2( diffs ); + assertEquals( "diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); - delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta); + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); - // Convert delta string into a diff. - assertEquals("diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta("", delta)); + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( "", delta ) ); } -void diff_match_patch_test::testDiffXIndex() { - // Translate a location in text1 to text2. - QList diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex(diffs, 2)); +void diff_match_patch_test::testDiffXIndex() +{ + // Translate a location in text1 to text2. + QList< Diff > diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) ); + assertEquals( "diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex( diffs, 2 ) ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex(diffs, 3)); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "1234" ), Diff( EQUAL, "xyz" ) ); + assertEquals( "diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex( diffs, 3 ) ); } -void diff_match_patch_test::testDiffLevenshtein() { - QList diffs = diffList(Diff(DELETE, "abc"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein(diffs)); +void diff_match_patch_test::testDiffLevenshtein() +{ + QList< Diff > diffs = diffList( Diff( DELETE, "abc" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) ); + assertEquals( "diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein( diffs ) ); - diffs = diffList(Diff(EQUAL, "xyz"), Diff(DELETE, "abc"), Diff(INSERT, "1234")); - assertEquals("diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein(diffs)); + diffs = diffList( Diff( EQUAL, "xyz" ), Diff( DELETE, "abc" ), Diff( INSERT, "1234" ) ); + assertEquals( "diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein( diffs ) ); - diffs = diffList(Diff(DELETE, "abc"), Diff(EQUAL, "xyz"), Diff(INSERT, "1234")); - assertEquals("diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein(diffs)); + diffs = diffList( Diff( DELETE, "abc" ), Diff( EQUAL, "xyz" ), Diff( INSERT, "1234" ) ); + assertEquals( "diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein( diffs ) ); } -void diff_match_patch_test::testDiffBisect() { - // Normal. - QString a = "cat"; - QString b = "map"; - // Since the resulting diff hasn't been normalized, it would be ok if - // the insertion and deletion pairs are swapped. - // If the order changes, tweak this test as required. - QList diffs = diffList(Diff(DELETE, "c"), Diff(INSERT, "m"), Diff(EQUAL, "a"), Diff(DELETE, "t"), Diff(INSERT, "p")); - assertEquals("diff_bisect: Normal.", diffs, dmp.diff_bisect(a, b, std::numeric_limits::max())); - - // Timeout. - diffs = diffList(Diff(DELETE, "cat"), Diff(INSERT, "map")); - assertEquals("diff_bisect: Timeout.", diffs, dmp.diff_bisect(a, b, 0)); +void diff_match_patch_test::testDiffBisect() +{ + // Normal. + QString a = "cat"; + QString b = "map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + QList< Diff > diffs = diffList( Diff( DELETE, "c" ), Diff( INSERT, "m" ), Diff( EQUAL, "a" ), Diff( DELETE, "t" ), Diff( INSERT, "p" ) ); + assertEquals( "diff_bisect: Normal.", diffs, dmp.diff_bisect( a, b, std::numeric_limits< clock_t >::max() ) ); + + // Timeout. + diffs = diffList( Diff( DELETE, "cat" ), Diff( INSERT, "map" ) ); + assertEquals( "diff_bisect: Timeout.", diffs, dmp.diff_bisect( a, b, 0 ) ); } -void diff_match_patch_test::testDiffMain() { - // Perform a trivial diff. - QList diffs = diffList(); - assertEquals("diff_main: Null case.", diffs, dmp.diff_main("", "", false)); +void diff_match_patch_test::testDiffMain() +{ + // Perform a trivial diff. + QList< Diff > diffs = diffList(); + assertEquals( "diff_main: Null case.", diffs, dmp.diff_main( "", "", false ) ); - diffs = diffList(Diff(EQUAL, "abc")); - assertEquals("diff_main: Equality.", diffs, dmp.diff_main("abc", "abc", false)); + diffs = diffList( Diff( EQUAL, "abc" ) ); + assertEquals( "diff_main: Equality.", diffs, dmp.diff_main( "abc", "abc", false ) ); - diffs = diffList(Diff(EQUAL, "ab"), Diff(INSERT, "123"), Diff(EQUAL, "c")); - assertEquals("diff_main: Simple insertion.", diffs, dmp.diff_main("abc", "ab123c", false)); + diffs = diffList( Diff( EQUAL, "ab" ), Diff( INSERT, "123" ), Diff( EQUAL, "c" ) ); + assertEquals( "diff_main: Simple insertion.", diffs, dmp.diff_main( "abc", "ab123c", false ) ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "bc")); - assertEquals("diff_main: Simple deletion.", diffs, dmp.diff_main("a123bc", "abc", false)); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "123" ), Diff( EQUAL, "bc" ) ); + assertEquals( "diff_main: Simple deletion.", diffs, dmp.diff_main( "a123bc", "abc", false ) ); - diffs = diffList(Diff(EQUAL, "a"), Diff(INSERT, "123"), Diff(EQUAL, "b"), Diff(INSERT, "456"), Diff(EQUAL, "c")); - assertEquals("diff_main: Two insertions.", diffs, dmp.diff_main("abc", "a123b456c", false)); + diffs = diffList( Diff( EQUAL, "a" ), Diff( INSERT, "123" ), Diff( EQUAL, "b" ), Diff( INSERT, "456" ), Diff( EQUAL, "c" ) ); + assertEquals( "diff_main: Two insertions.", diffs, dmp.diff_main( "abc", "a123b456c", false ) ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "b"), Diff(DELETE, "456"), Diff(EQUAL, "c")); - assertEquals("diff_main: Two deletions.", diffs, dmp.diff_main("a123b456c", "abc", false)); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "123" ), Diff( EQUAL, "b" ), Diff( DELETE, "456" ), Diff( EQUAL, "c" ) ); + assertEquals( "diff_main: Two deletions.", diffs, dmp.diff_main( "a123b456c", "abc", false ) ); - // Perform a real diff. - // Switch off the timeout. - dmp.Diff_Timeout = 0; - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "b")); - assertEquals("diff_main: Simple case #1.", diffs, dmp.diff_main("a", "b", false)); + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, "b" ) ); + assertEquals( "diff_main: Simple case #1.", diffs, dmp.diff_main( "a", "b", false ) ); - diffs = diffList(Diff(DELETE, "Apple"), Diff(INSERT, "Banana"), Diff(EQUAL, "s are a"), Diff(INSERT, "lso"), Diff(EQUAL, " fruit.")); - assertEquals("diff_main: Simple case #2.", diffs, dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false)); + diffs = diffList( Diff( DELETE, "Apple" ), Diff( INSERT, "Banana" ), Diff( EQUAL, "s are a" ), Diff( INSERT, "lso" ), Diff( EQUAL, " fruit." ) ); + assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( "Apples are a fruit.", "Bananas are also fruit.", false ) ); - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\u0680", 1)), Diff(EQUAL, "x"), Diff(DELETE, "\t"), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\000", 1))); - assertEquals("diff_main: Simple case #3.", diffs, dmp.diff_main("ax\t", QString::fromWCharArray((const wchar_t*) L"\u0680x\000", 3), false)); + diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, QString::fromWCharArray( (const wchar_t *)L"\u0680", 1 ) ), Diff( EQUAL, "x" ), Diff( DELETE, "\t" ), Diff( INSERT, QString::fromWCharArray( (const wchar_t *)L"\000", 1 ) ) ); + assertEquals( "diff_main: Simple case #3.", diffs, dmp.diff_main( "ax\t", QString::fromWCharArray( (const wchar_t *)L"\u0680x\000", 3 ), false ) ); - diffs = diffList(Diff(DELETE, "1"), Diff(EQUAL, "a"), Diff(DELETE, "y"), Diff(EQUAL, "b"), Diff(DELETE, "2"), Diff(INSERT, "xab")); - assertEquals("diff_main: Overlap #1.", diffs, dmp.diff_main("1ayb2", "abxab", false)); + diffs = diffList( Diff( DELETE, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "y" ), Diff( EQUAL, "b" ), Diff( DELETE, "2" ), Diff( INSERT, "xab" ) ); + assertEquals( "diff_main: Overlap #1.", diffs, dmp.diff_main( "1ayb2", "abxab", false ) ); - diffs = diffList(Diff(INSERT, "xaxcx"), Diff(EQUAL, "abc"), Diff(DELETE, "y")); - assertEquals("diff_main: Overlap #2.", diffs, dmp.diff_main("abcy", "xaxcxabc", false)); + diffs = diffList( Diff( INSERT, "xaxcx" ), Diff( EQUAL, "abc" ), Diff( DELETE, "y" ) ); + assertEquals( "diff_main: Overlap #2.", diffs, dmp.diff_main( "abcy", "xaxcxabc", false ) ); - diffs = diffList(Diff(DELETE, "ABCD"), Diff(EQUAL, "a"), Diff(DELETE, "="), Diff(INSERT, "-"), Diff(EQUAL, "bcd"), Diff(DELETE, "="), Diff(INSERT, "-"), Diff(EQUAL, "efghijklmnopqrs"), Diff(DELETE, "EFGHIJKLMNOefg")); - assertEquals("diff_main: Overlap #3.", diffs, dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false)); + diffs = diffList( Diff( DELETE, "ABCD" ), Diff( EQUAL, "a" ), Diff( DELETE, "=" ), Diff( INSERT, "-" ), Diff( EQUAL, "bcd" ), Diff( DELETE, "=" ), Diff( INSERT, "-" ), Diff( EQUAL, "efghijklmnopqrs" ), Diff( DELETE, "EFGHIJKLMNOefg" ) ); + assertEquals( "diff_main: Overlap #3.", diffs, dmp.diff_main( "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false ) ); - diffs = diffList(Diff(INSERT, " "), Diff(EQUAL, "a"), Diff(INSERT, "nd"), Diff(EQUAL, " [[Pennsylvania]]"), Diff(DELETE, " and [[New")); - assertEquals("diff_main: Large equality.", diffs, dmp.diff_main("a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false)); + diffs = diffList( Diff( INSERT, " " ), Diff( EQUAL, "a" ), Diff( INSERT, "nd" ), Diff( EQUAL, " [[Pennsylvania]]" ), Diff( DELETE, " and [[New" ) ); + assertEquals( "diff_main: Large equality.", diffs, dmp.diff_main( "a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false ) ); - dmp.Diff_Timeout = 0.1f; // 100ms - // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. - QString a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; - QString b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; - // Increase the text lengths by 1024 times to ensure a timeout. - for (int x = 0; x < 10; x++) { - a = a + a; - b = b + b; - } - clock_t startTime = clock(); - dmp.diff_main(a, b); - clock_t endTime = clock(); - // Test that we took at least the timeout period. - assertTrue("diff_main: Timeout min.", dmp.Diff_Timeout * CLOCKS_PER_SEC <= endTime - startTime); - // Test that we didn't take forever (be forgiving). - // Theoretically this test could fail very occasionally if the - // OS task swaps or locks up for a second at the wrong moment. - // Java seems to overrun by ~80% (compared with 10% for other languages). - // Therefore use an upper limit of 0.5s instead of 0.2s. - assertTrue("diff_main: Timeout max.", dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 > endTime - startTime); - dmp.Diff_Timeout = 0; - - // Test the linemode speedup. - // Must be long to pass the 100 char cutoff. - a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; - assertEquals("diff_main: Simple line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); - - a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; - b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; - assertEquals("diff_main: Single line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); - - a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; - QStringList texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); - QStringList texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); - assertEquals("diff_main: Overlap line-mode.", texts_textmode, texts_linemode); - - // Test null inputs. - try { - dmp.diff_main(NULL, NULL); - assertFalse("diff_main: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. + QString a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + QString b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for ( int x = 0; x < 10; x++ ) + { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main( a, b ); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue( "diff_main: Timeout min.", dmp.Diff_Timeout * CLOCKS_PER_SEC <= endTime - startTime ); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue( "diff_main: Timeout max.", dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 > endTime - startTime ); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + assertEquals( "diff_main: Simple line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals( "diff_main: Single line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + QStringList texts_linemode = diff_rebuildtexts( dmp.diff_main( a, b, true ) ); + QStringList texts_textmode = diff_rebuildtexts( dmp.diff_main( a, b, false ) ); + assertEquals( "diff_main: Overlap line-mode.", texts_textmode, texts_linemode ); + + // Test null inputs. + try + { + dmp.diff_main( NULL, NULL ); + assertFalse( "diff_main: Null inputs.", true ); + } + catch ( const char *ex ) + { + // Exception expected. + } } - // MATCH TEST FUNCTIONS - -void diff_match_patch_test::testMatchAlphabet() { - // Initialise the bitmasks for Bitap. - QMap bitmask; - bitmask.insert('a', 4); - bitmask.insert('b', 2); - bitmask.insert('c', 1); - assertEquals("match_alphabet: Unique.", bitmask, dmp.match_alphabet("abc")); - - bitmask = QMap(); - bitmask.insert('a', 37); - bitmask.insert('b', 18); - bitmask.insert('c', 8); - assertEquals("match_alphabet: Duplicates.", bitmask, dmp.match_alphabet("abcaba")); +void diff_match_patch_test::testMatchAlphabet() +{ + // Initialise the bitmasks for Bitap. + QMap< QChar, int > bitmask; + bitmask.insert( 'a', 4 ); + bitmask.insert( 'b', 2 ); + bitmask.insert( 'c', 1 ); + assertEquals( "match_alphabet: Unique.", bitmask, dmp.match_alphabet( "abc" ) ); + + bitmask = QMap< QChar, int >(); + bitmask.insert( 'a', 37 ); + bitmask.insert( 'b', 18 ); + bitmask.insert( 'c', 8 ); + assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( "abcaba" ) ); } -void diff_match_patch_test::testMatchBitap() { - // Bitap algorithm. - dmp.Match_Distance = 100; - dmp.Match_Threshold = 0.5f; - assertEquals("match_bitap: Exact match #1.", 5, dmp.match_bitap("abcdefghijk", "fgh", 5)); +void diff_match_patch_test::testMatchBitap() +{ + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Exact match #1.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 5 ) ); - assertEquals("match_bitap: Exact match #2.", 5, dmp.match_bitap("abcdefghijk", "fgh", 0)); + assertEquals( "match_bitap: Exact match #2.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 0 ) ); - assertEquals("match_bitap: Fuzzy match #1.", 4, dmp.match_bitap("abcdefghijk", "efxhi", 0)); + assertEquals( "match_bitap: Fuzzy match #1.", 4, dmp.match_bitap( "abcdefghijk", "efxhi", 0 ) ); - assertEquals("match_bitap: Fuzzy match #2.", 2, dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)); + assertEquals( "match_bitap: Fuzzy match #2.", 2, dmp.match_bitap( "abcdefghijk", "cdefxyhijk", 5 ) ); - assertEquals("match_bitap: Fuzzy match #3.", -1, dmp.match_bitap("abcdefghijk", "bxy", 1)); + assertEquals( "match_bitap: Fuzzy match #3.", -1, dmp.match_bitap( "abcdefghijk", "bxy", 1 ) ); - assertEquals("match_bitap: Overflow.", 2, dmp.match_bitap("123456789xx0", "3456789x0", 2)); + assertEquals( "match_bitap: Overflow.", 2, dmp.match_bitap( "123456789xx0", "3456789x0", 2 ) ); - assertEquals("match_bitap: Before start match.", 0, dmp.match_bitap("abcdef", "xxabc", 4)); + assertEquals( "match_bitap: Before start match.", 0, dmp.match_bitap( "abcdef", "xxabc", 4 ) ); - assertEquals("match_bitap: Beyond end match.", 3, dmp.match_bitap("abcdef", "defyy", 4)); + assertEquals( "match_bitap: Beyond end match.", 3, dmp.match_bitap( "abcdef", "defyy", 4 ) ); - assertEquals("match_bitap: Oversized pattern.", 0, dmp.match_bitap("abcdef", "xabcdefy", 0)); + assertEquals( "match_bitap: Oversized pattern.", 0, dmp.match_bitap( "abcdef", "xabcdefy", 0 ) ); - dmp.Match_Threshold = 0.4f; - assertEquals("match_bitap: Threshold #1.", 4, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + dmp.Match_Threshold = 0.4f; + assertEquals( "match_bitap: Threshold #1.", 4, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); - dmp.Match_Threshold = 0.3f; - assertEquals("match_bitap: Threshold #2.", -1, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + dmp.Match_Threshold = 0.3f; + assertEquals( "match_bitap: Threshold #2.", -1, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); - dmp.Match_Threshold = 0.0f; - assertEquals("match_bitap: Threshold #3.", 1, dmp.match_bitap("abcdefghijk", "bcdef", 1)); + dmp.Match_Threshold = 0.0f; + assertEquals( "match_bitap: Threshold #3.", 1, dmp.match_bitap( "abcdefghijk", "bcdef", 1 ) ); - dmp.Match_Threshold = 0.5f; - assertEquals("match_bitap: Multiple select #1.", 0, dmp.match_bitap("abcdexyzabcde", "abccde", 3)); + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Multiple select #1.", 0, dmp.match_bitap( "abcdexyzabcde", "abccde", 3 ) ); - assertEquals("match_bitap: Multiple select #2.", 8, dmp.match_bitap("abcdexyzabcde", "abccde", 5)); + assertEquals( "match_bitap: Multiple select #2.", 8, dmp.match_bitap( "abcdexyzabcde", "abccde", 5 ) ); - dmp.Match_Distance = 10; // Strict location. - assertEquals("match_bitap: Distance test #1.", -1, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + dmp.Match_Distance = 10; // Strict location. + assertEquals( "match_bitap: Distance test #1.", -1, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); - assertEquals("match_bitap: Distance test #2.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)); + assertEquals( "match_bitap: Distance test #2.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1 ) ); - dmp.Match_Distance = 1000; // Loose location. - assertEquals("match_bitap: Distance test #3.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + dmp.Match_Distance = 1000; // Loose location. + assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); } -void diff_match_patch_test::testMatchMain() { - // Full match. - assertEquals("match_main: Equality.", 0, dmp.match_main("abcdef", "abcdef", 1000)); +void diff_match_patch_test::testMatchMain() +{ + // Full match. + assertEquals( "match_main: Equality.", 0, dmp.match_main( "abcdef", "abcdef", 1000 ) ); - assertEquals("match_main: Null text.", -1, dmp.match_main("", "abcdef", 1)); + assertEquals( "match_main: Null text.", -1, dmp.match_main( "", "abcdef", 1 ) ); - assertEquals("match_main: Null pattern.", 3, dmp.match_main("abcdef", "", 3)); + assertEquals( "match_main: Null pattern.", 3, dmp.match_main( "abcdef", "", 3 ) ); - assertEquals("match_main: Exact match.", 3, dmp.match_main("abcdef", "de", 3)); + assertEquals( "match_main: Exact match.", 3, dmp.match_main( "abcdef", "de", 3 ) ); - dmp.Match_Threshold = 0.7f; - assertEquals("match_main: Complex match.", 4, dmp.match_main("I am the very model of a modern major general.", " that berry ", 5)); - dmp.Match_Threshold = 0.5f; + dmp.Match_Threshold = 0.7f; + assertEquals( "match_main: Complex match.", 4, dmp.match_main( "I am the very model of a modern major general.", " that berry ", 5 ) ); + dmp.Match_Threshold = 0.5f; - // Test null inputs. - try { - dmp.match_main(NULL, NULL, 0); - assertFalse("match_main: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + // Test null inputs. + try + { + dmp.match_main( NULL, NULL, 0 ); + assertFalse( "match_main: Null inputs.", true ); + } + catch ( const char *ex ) + { + // Exception expected. + } } - // PATCH TEST FUNCTIONS - -void diff_match_patch_test::testPatchObj() { - // Patch Object. - Patch p; - p.start1 = 20; - p.start2 = 21; - p.length1 = 18; - p.length2 = 17; - p.diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, "\nlaz")); - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals("Patch: toString.", strp, p.toString()); +void diff_match_patch_test::testPatchObj() +{ + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = diffList( Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, "\nlaz" ) ); + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "Patch: toString.", strp, p.toString() ); } -void diff_match_patch_test::testPatchFromText() { - assertTrue("patch_fromText: #0.", dmp.patch_fromText("").isEmpty()); +void diff_match_patch_test::testPatchFromText() +{ + assertTrue( "patch_fromText: #0.", dmp.patch_fromText( "" ).isEmpty() ); - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals("patch_fromText: #1.", strp, dmp.patch_fromText(strp).value(0).toString()); + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "patch_fromText: #1.", strp, dmp.patch_fromText( strp ).value( 0 ).toString() ); - assertEquals("patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n").value(0).toString()); + assertEquals( "patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( "@@ -1 +1 @@\n-a\n+b\n" ).value( 0 ).toString() ); - assertEquals("patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n").value(0).toString()); + assertEquals( "patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( "@@ -1,3 +0,0 @@\n-abc\n" ).value( 0 ).toString() ); - assertEquals("patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n").value(0).toString()); + assertEquals( "patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( "@@ -0,0 +1,3 @@\n+abc\n" ).value( 0 ).toString() ); - // Generates error. - try { - dmp.patch_fromText("Bad\nPatch\n"); - assertFalse("patch_fromText: #5.", true); - } catch (QString ex) { - // Exception expected. - } + // Generates error. + try + { + dmp.patch_fromText( "Bad\nPatch\n" ); + assertFalse( "patch_fromText: #5.", true ); + } + catch ( QString ex ) + { + // Exception expected. + } } -void diff_match_patch_test::testPatchToText() { - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - QList patches; - patches = dmp.patch_fromText(strp); - assertEquals("patch_toText: Single", strp, dmp.patch_toText(patches)); +void diff_match_patch_test::testPatchToText() +{ + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + QList< Patch > patches; + patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Single", strp, dmp.patch_toText( patches ) ); - strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; - patches = dmp.patch_fromText(strp); - assertEquals("patch_toText: Dual", strp, dmp.patch_toText(patches)); + strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Dual", strp, dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchAddContext() { - dmp.Patch_Margin = 4; - Patch p; - p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); - assertEquals("patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString()); - - p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps."); - assertEquals("patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString()); - - p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps."); - assertEquals("patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); - - p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps. The quick brown fox crashes."); - assertEquals("patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString()); +void diff_match_patch_test::testPatchAddContext() +{ + dmp.Patch_Margin = 4; + Patch p; + p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" ).value( 0 ); + dmp.patch_addContext( p, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" ).value( 0 ); + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" ).value( 0 ); + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" ).value( 0 ); + dmp.patch_addContext( p, "The quick brown fox jumps. The quick brown fox crashes." ); + assertEquals( "patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); } -void diff_match_patch_test::testPatchMake() { - QList patches; - patches = dmp.patch_make("", ""); - assertEquals("patch_make: Null case", "", dmp.patch_toText(patches)); - - QString text1 = "The quick brown fox jumps over the lazy dog."; - QString text2 = "That quick brown fox jumped over a lazy dog."; - QString expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; - // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. - patches = dmp.patch_make(text2, text1); - assertEquals("patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText(patches)); +void diff_match_patch_test::testPatchMake() +{ + QList< Patch > patches; + patches = dmp.patch_make( "", "" ); + assertEquals( "patch_make: Null case", "", dmp.patch_toText( patches ) ); - expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - patches = dmp.patch_make(text1, text2); - assertEquals("patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText(patches)); + QString text1 = "The quick brown fox jumps over the lazy dog."; + QString text2 = "That quick brown fox jumped over a lazy dog."; + QString expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make( text2, text1 ); + assertEquals( "patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText( patches ) ); - QList diffs = dmp.diff_main(text1, text2, false); - patches = dmp.patch_make(diffs); - assertEquals("patch_make: Diff input", expectedPatch, dmp.patch_toText(patches)); + expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText( patches ) ); - patches = dmp.patch_make(text1, diffs); - assertEquals("patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText(patches)); + QList< Diff > diffs = dmp.diff_main( text1, text2, false ); + patches = dmp.patch_make( diffs ); + assertEquals( "patch_make: Diff input", expectedPatch, dmp.patch_toText( patches ) ); - patches = dmp.patch_make(text1, text2, diffs); - assertEquals("patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText(patches)); + patches = dmp.patch_make( text1, diffs ); + assertEquals( "patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText( patches ) ); - patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); - assertEquals("patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText(patches)); + patches = dmp.patch_make( text1, text2, diffs ); + assertEquals( "patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText( patches ) ); - diffs = diffList(Diff(DELETE, "`1234567890-=[]\\;',./"), Diff(INSERT, "~!@#$%^&*()_+{}|:\"<>?")); - assertEquals("patch_fromText: Character decoding.", diffs, dmp.patch_fromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n").value(0).diffs); + patches = dmp.patch_make( "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?" ); + assertEquals( "patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText( patches ) ); - text1 = ""; - for (int x = 0; x < 100; x++) { - text1 += "abcdef"; - } - text2 = text1 + "123"; - expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; - patches = dmp.patch_make(text1, text2); - assertEquals("patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText(patches)); + diffs = diffList( Diff( DELETE, "`1234567890-=[]\\;',./" ), Diff( INSERT, "~!@#$%^&*()_+{}|:\"<>?" ) ); + assertEquals( "patch_fromText: Character decoding.", diffs, dmp.patch_fromText( "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" ).value( 0 ).diffs ); - // Test null inputs. - try { - dmp.patch_make(NULL, NULL); - assertFalse("patch_make: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + text1 = ""; + for ( int x = 0; x < 100; x++ ) + { + text1 += "abcdef"; + } + text2 = text1 + "123"; + expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText( patches ) ); + + // Test null inputs. + try + { + dmp.patch_make( NULL, NULL ); + assertFalse( "patch_make: Null inputs.", true ); + } + catch ( const char *ex ) + { + // Exception expected. + } } -void diff_match_patch_test::testPatchSplitMax() { - // Assumes that Match_MaxBits is 32. - QList patches; - patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz"); - QString oldToText = dmp.patch_toText(patches); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); - - patches = dmp.patch_make("1234567890123456789012345678901234567890123456789012345678901234567890", "abc"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText(patches)); +void diff_match_patch_test::testPatchSplitMax() +{ + // Assumes that Match_MaxBits is 32. + QList< Patch > patches; + patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz" ); + QString oldToText = dmp.patch_toText( patches ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #2.", oldToText, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "1234567890123456789012345678901234567890123456789012345678901234567890", "abc" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchAddPadding() { - QList patches; - patches = dmp.patch_make("", "test"); - assertEquals("patch_addPadding: Both edges full.", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("XY", "XtestY"); - assertEquals("patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); - assertEquals("patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); +void diff_match_patch_test::testPatchAddPadding() +{ + QList< Patch > patches; + patches = dmp.patch_make( "", "test" ); + assertEquals( "patch_addPadding: Both edges full.", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "XY", "XtestY" ); + assertEquals( "patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "XXXXYYYY", "XXXXtestYYYY" ); + assertEquals( "patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchApply() { - dmp.Match_Distance = 1000; - dmp.Match_Threshold = 0.5f; - dmp.Patch_DeleteThreshold = 0.5f; - QList patches; - patches = dmp.patch_make("", ""); - QPair > results = dmp.patch_apply(patches, "Hello world."); - QVector boolArray = results.second; - - QString resultStr = QString("%1\t%2").arg(results.first).arg(boolArray.count()); - assertEquals("patch_apply: Null case.", "Hello world.\t0", resultStr); - - patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog."); - results = dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr); - - results = dmp.patch_apply(patches, "The quick red rabbit jumps over the tired tiger."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr); - - results = dmp.patch_apply(patches, "I am the very model of a modern major general."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr); - - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr); - - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr); - - dmp.Patch_DeleteThreshold = 0.6f; - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr); - dmp.Patch_DeleteThreshold = 0.5f; - - dmp.Match_Threshold = 0.0f; - dmp.Match_Distance = 0; - patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890"); - results = dmp.patch_apply(patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr); - dmp.Match_Threshold = 0.5f; - dmp.Match_Distance = 1000; - - patches = dmp.patch_make("", "test"); - QString patchStr = dmp.patch_toText(patches); - dmp.patch_apply(patches, ""); - assertEquals("patch_apply: No side effects.", patchStr, dmp.patch_toText(patches)); - - patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); - patchStr = dmp.patch_toText(patches); - dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); - assertEquals("patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText(patches)); - - patches = dmp.patch_make("", "test"); - results = dmp.patch_apply(patches, ""); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Edge exact match.", "test\ttrue", resultStr); - - patches = dmp.patch_make("XY", "XtestY"); - results = dmp.patch_apply(patches, "XY"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr); - - patches = dmp.patch_make("y", "y123"); - results = dmp.patch_apply(patches, "x"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Edge partial match.", "x123\ttrue", resultStr); +void diff_match_patch_test::testPatchApply() +{ + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + QList< Patch > patches; + patches = dmp.patch_make( "", "" ); + QPair< QString, QVector< bool > > results = dmp.patch_apply( patches, "Hello world." ); + QVector< bool > boolArray = results.second; + + QString resultStr = QString( "%1\t%2" ).arg( results.first ).arg( boolArray.count() ); + assertEquals( "patch_apply: Null case.", "Hello world.\t0", resultStr ); + + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog." ); + results = dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); + + results = dmp.patch_apply( patches, "The quick red rabbit jumps over the tired tiger." ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); + + results = dmp.patch_apply( patches, "I am the very model of a modern major general." ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr ); + + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr ); + + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr ); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890" ); + results = dmp.patch_apply( patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make( "", "test" ); + QString patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, "" ); + assertEquals( "patch_apply: No side effects.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "Woof" ); + patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "", "test" ); + results = dmp.patch_apply( patches, "" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ); + assertEquals( "patch_apply: Edge exact match.", "test\ttrue", resultStr ); + + patches = dmp.patch_make( "XY", "XtestY" ); + results = dmp.patch_apply( patches, "XY" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ); + assertEquals( "patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr ); + + patches = dmp.patch_make( "y", "y123" ); + results = dmp.patch_apply( patches, "x" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ); + assertEquals( "patch_apply: Edge partial match.", "x123\ttrue", resultStr ); } - -void diff_match_patch_test::assertEquals(const QString &strCase, int n1, int n2) { - if (n1 != n2) { - qDebug("%s FAIL\nExpected: %d\nActual: %d", qPrintable(strCase), n1, n2); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, int n1, int n2 ) +{ + if ( n1 != n2 ) + { + qDebug( "%s FAIL\nExpected: %d\nActual: %d", qPrintable( strCase ), n1, n2 ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QString &s1, const QString &s2) { - if (s1 != s2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(s1), qPrintable(s2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, const QString &s1, const QString &s2 ) +{ + if ( s1 != s2 ) + { + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( s1 ), qPrintable( s2 ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const Diff &d1, const Diff &d2) { - if (d1 != d2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(d1.toString()), qPrintable(d2.toString())); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, const Diff &d1, const Diff &d2 ) +{ + if ( d1 != d2 ) + { + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( d1.toString() ), qPrintable( d2.toString() ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QList &list1, const QList &list2) { - bool fail = false; - if (list1.count() == list2.count()) { - int i = 0; - foreach(Diff d1, list1) { - Diff d2 = list2.value(i); - if (d1 != d2) { +void diff_match_patch_test::assertEquals( const QString &strCase, const QList< Diff > &list1, const QList< Diff > &list2 ) +{ + bool fail = false; + if ( list1.count() == list2.count() ) + { + int i = 0; + foreach( Diff d1, list1 ) + { + Diff d2 = list2.value( i ); + if ( d1 != d2 ) + { + fail = true; + break; + } + i++; + } + } + else + { fail = true; - break; - } - i++; } - } else { - fail = true; - } - if (fail) { - // Build human readable description of both lists. - QString listString1 = "("; - bool first = true; - foreach(Diff d1, list1) { - if (!first) { - listString1 += ", "; - } - listString1 += d1.toString(); - first = false; - } - listString1 += ")"; - QString listString2 = "("; - first = true; - foreach(Diff d2, list2) { - if (!first) { - listString2 += ", "; - } - listString2 += d2.toString(); - first = false; + if ( fail ) + { + // Build human readable description of both lists. + QString listString1 = "("; + bool first = true; + foreach( Diff d1, list1 ) + { + if ( !first ) + { + listString1 += ", "; + } + listString1 += d1.toString(); + first = false; + } + listString1 += ")"; + QString listString2 = "("; + first = true; + foreach( Diff d2, list2 ) + { + if ( !first ) + { + listString2 += ", "; + } + listString2 += d2.toString(); + first = false; + } + listString2 += ")"; + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( listString1 ), qPrintable( listString2 ) ); + throw strCase; } - listString2 += ")"; - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QList &list1, const QList &list2) { - bool fail = false; - if (list1.count() == list2.count()) { - int i = 0; - foreach(QVariant q1, list1) { - QVariant q2 = list2.value(i); - if (q1 != q2) { +void diff_match_patch_test::assertEquals( const QString &strCase, const QList< QVariant > &list1, const QList< QVariant > &list2 ) +{ + bool fail = false; + if ( list1.count() == list2.count() ) + { + int i = 0; + foreach( QVariant q1, list1 ) + { + QVariant q2 = list2.value( i ); + if ( q1 != q2 ) + { + fail = true; + break; + } + i++; + } + } + else + { fail = true; - break; - } - i++; } - } else { - fail = true; - } - if (fail) { - // Build human readable description of both lists. - QString listString1 = "("; - bool first = true; - foreach(QVariant q1, list1) { - if (!first) { - listString1 += ", "; - } - listString1 += q1.toString(); - first = false; - } - listString1 += ")"; - QString listString2 = "("; - first = true; - foreach(QVariant q2, list2) { - if (!first) { - listString2 += ", "; - } - listString2 += q2.toString(); - first = false; + if ( fail ) + { + // Build human readable description of both lists. + QString listString1 = "("; + bool first = true; + foreach( QVariant q1, list1 ) + { + if ( !first ) + { + listString1 += ", "; + } + listString1 += q1.toString(); + first = false; + } + listString1 += ")"; + QString listString2 = "("; + first = true; + foreach( QVariant q2, list2 ) + { + if ( !first ) + { + listString2 += ", "; + } + listString2 += q2.toString(); + first = false; + } + listString2 += ")"; + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( listString1 ), qPrintable( listString2 ) ); + throw strCase; } - listString2 += ")"; - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QVariant &var1, const QVariant &var2) { - if (var1 != var2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(var1.toString()), qPrintable(var2.toString())); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, const QVariant &var1, const QVariant &var2 ) +{ + if ( var1 != var2 ) + { + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( var1.toString() ), qPrintable( var2.toString() ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QMap &m1, const QMap &m2) { - QMapIterator i1(m1), i2(m2); - - while (i1.hasNext() && i2.hasNext()) { - i1.next(); - i2.next(); - if (i1.key() != i2.key() || i1.value() != i2.value()) { - qDebug("%s FAIL\nExpected: (%c, %d)\nActual: (%c, %d)", qPrintable(strCase), - i1.key().toAscii(), i1.value(), i2.key().toAscii(), i2.value()); - throw strCase; +void diff_match_patch_test::assertEquals( const QString &strCase, const QMap< QChar, int > &m1, const QMap< QChar, int > &m2 ) +{ + QMapIterator< QChar, int > i1( m1 ), i2( m2 ); + + while ( i1.hasNext() && i2.hasNext() ) + { + i1.next(); + i2.next(); + if ( i1.key() != i2.key() || i1.value() != i2.value() ) + { + qDebug( "%s FAIL\nExpected: (%c, %d)\nActual: (%c, %d)", qPrintable( strCase ), i1.key().toAscii(), i1.value(), i2.key().toAscii(), i2.value() ); + throw strCase; + } } - } - if (i1.hasNext()) { - i1.next(); - qDebug("%s FAIL\nExpected: (%c, %d)\nActual: none", - qPrintable(strCase), i1.key().toAscii(), i1.value()); - throw strCase; - } - if (i2.hasNext()) { - i2.next(); - qDebug("%s FAIL\nExpected: none\nActual: (%c, %d)", - qPrintable(strCase), i2.key().toAscii(), i2.value()); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + if ( i1.hasNext() ) + { + i1.next(); + qDebug( "%s FAIL\nExpected: (%c, %d)\nActual: none", qPrintable( strCase ), i1.key().toAscii(), i1.value() ); + throw strCase; + } + if ( i2.hasNext() ) + { + i2.next(); + qDebug( "%s FAIL\nExpected: none\nActual: (%c, %d)", qPrintable( strCase ), i2.key().toAscii(), i2.value() ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QStringList &list1, const QStringList &list2) { - if (list1 != list2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(list1.join(",")), qPrintable(list2.join(","))); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, const QStringList &list1, const QStringList &list2 ) +{ + if ( list1 != list2 ) + { + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( list1.join( "," ) ), qPrintable( list2.join( "," ) ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertTrue(const QString &strCase, bool value) { - if (!value) { - qDebug("%s FAIL\nExpected: true\nActual: false", qPrintable(strCase)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertTrue( const QString &strCase, bool value ) +{ + if ( !value ) + { + qDebug( "%s FAIL\nExpected: true\nActual: false", qPrintable( strCase ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertFalse(const QString &strCase, bool value) { - if (value) { - qDebug("%s FAIL\nExpected: false\nActual: true", qPrintable(strCase)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertFalse( const QString &strCase, bool value ) +{ + if ( value ) + { + qDebug( "%s FAIL\nExpected: false\nActual: true", qPrintable( strCase ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } - // Construct the two texts which made up the diff originally. -QStringList diff_match_patch_test::diff_rebuildtexts(QList diffs) { - QStringList text; - text << QString("") << QString(""); - foreach (Diff myDiff, diffs) { - if (myDiff.operation != INSERT) { - text[0] += myDiff.text; +QStringList diff_match_patch_test::diff_rebuildtexts( QList< Diff > diffs ) +{ + QStringList text; + text << QString( "" ) << QString( "" ); + foreach( Diff myDiff, diffs ) + { + if ( myDiff.operation != INSERT ) + { + text[ 0 ] += myDiff.text; + } + if ( myDiff.operation != DELETE ) + { + text[ 1 ] += myDiff.text; + } } - if (myDiff.operation != DELETE) { - text[1] += myDiff.text; - } - } - return text; + return text; } -void diff_match_patch_test::assertEmpty(const QString &strCase, const QStringList &list) { - if (!list.isEmpty()) { - throw strCase; - } +void diff_match_patch_test::assertEmpty( const QString &strCase, const QStringList &list ) +{ + if ( !list.isEmpty() ) + { + throw strCase; + } } - // Private function for quickly building lists of diffs. -QList diff_match_patch_test::diffList(Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, - Diff d6, Diff d7, Diff d8, Diff d9, Diff d10) { - // Diff(INSERT, NULL) is invalid and thus is used as the default argument. - QList listRet; - if (d1.operation == INSERT && d1.text == NULL) { - return listRet; - } - listRet << d1; +QList< Diff > diff_match_patch_test::diffList( Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, Diff d6, Diff d7, Diff d8, Diff d9, Diff d10 ) +{ + // Diff(INSERT, NULL) is invalid and thus is used as the default argument. + QList< Diff > listRet; + if ( d1.operation == INSERT && d1.text == NULL ) + { + return listRet; + } + listRet << d1; - if (d2.operation == INSERT && d2.text == NULL) { - return listRet; - } - listRet << d2; + if ( d2.operation == INSERT && d2.text == NULL ) + { + return listRet; + } + listRet << d2; - if (d3.operation == INSERT && d3.text == NULL) { - return listRet; - } - listRet << d3; + if ( d3.operation == INSERT && d3.text == NULL ) + { + return listRet; + } + listRet << d3; - if (d4.operation == INSERT && d4.text == NULL) { - return listRet; - } - listRet << d4; + if ( d4.operation == INSERT && d4.text == NULL ) + { + return listRet; + } + listRet << d4; - if (d5.operation == INSERT && d5.text == NULL) { - return listRet; - } - listRet << d5; + if ( d5.operation == INSERT && d5.text == NULL ) + { + return listRet; + } + listRet << d5; - if (d6.operation == INSERT && d6.text == NULL) { - return listRet; - } - listRet << d6; + if ( d6.operation == INSERT && d6.text == NULL ) + { + return listRet; + } + listRet << d6; - if (d7.operation == INSERT && d7.text == NULL) { - return listRet; - } - listRet << d7; + if ( d7.operation == INSERT && d7.text == NULL ) + { + return listRet; + } + listRet << d7; - if (d8.operation == INSERT && d8.text == NULL) { - return listRet; - } - listRet << d8; + if ( d8.operation == INSERT && d8.text == NULL ) + { + return listRet; + } + listRet << d8; - if (d9.operation == INSERT && d9.text == NULL) { - return listRet; - } - listRet << d9; + if ( d9.operation == INSERT && d9.text == NULL ) + { + return listRet; + } + listRet << d9; - if (d10.operation == INSERT && d10.text == NULL) { - return listRet; - } - listRet << d10; + if ( d10.operation == INSERT && d10.text == NULL ) + { + return listRet; + } + listRet << d10; - return listRet; + return listRet; } - /* Compile instructions for MinGW and QT4 on Windows: qmake -project diff --git a/cpp/diff_match_patch_test.h b/cpp/diff_match_patch_test.h index 97922229..e3aa6d1e 100644 --- a/cpp/diff_match_patch_test.h +++ b/cpp/diff_match_patch_test.h @@ -19,71 +19,68 @@ #ifndef DIFF_MATCH_PATCH_TEST_H #define DIFF_MATCH_PATCH_TEST_H -class diff_match_patch_test { - public: - diff_match_patch_test(); - void run_all_tests(); +class diff_match_patch_test +{ +public: + diff_match_patch_test(); + void run_all_tests(); - // DIFF TEST FUNCTIONS - void testDiffCommonPrefix(); - void testDiffCommonSuffix(); - void testDiffCommonOverlap(); - void testDiffHalfmatch(); - void testDiffLinesToChars(); - void testDiffCharsToLines(); - void testDiffCleanupMerge(); - void testDiffCleanupSemanticLossless(); - void testDiffCleanupSemantic(); - void testDiffCleanupEfficiency(); - void testDiffPrettyHtml(); - void testDiffText(); - void testDiffDelta(); - void testDiffXIndex(); - void testDiffLevenshtein(); - void testDiffBisect(); - void testDiffMain(); + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); - // MATCH TEST FUNCTIONS - void testMatchAlphabet(); - void testMatchBitap(); - void testMatchMain(); + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); - // PATCH TEST FUNCTIONS - void testPatchObj(); - void testPatchFromText(); - void testPatchToText(); - void testPatchAddContext(); - void testPatchMake(); - void testPatchSplitMax(); - void testPatchAddPadding(); - void testPatchApply(); + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); - private: - diff_match_patch dmp; +private: + diff_match_patch dmp; - // Define equality. - void assertEquals(const QString &strCase, int n1, int n2); - void assertEquals(const QString &strCase, const QString &s1, const QString &s2); - void assertEquals(const QString &strCase, const Diff &d1, const Diff &d2); - void assertEquals(const QString &strCase, const QList &list1, const QList &list2); - void assertEquals(const QString &strCase, const QList &list1, const QList &list2); - void assertEquals(const QString &strCase, const QVariant &var1, const QVariant &var2); - void assertEquals(const QString &strCase, const QMap &m1, const QMap &m2); - void assertEquals(const QString &strCase, const QStringList &list1, const QStringList &list2); - void assertTrue(const QString &strCase, bool value); - void assertFalse(const QString &strCase, bool value); - void assertEmpty(const QString &strCase, const QStringList &list); + // Define equality. + void assertEquals( const QString &strCase, int n1, int n2 ); + void assertEquals( const QString &strCase, const QString &s1, const QString &s2 ); + void assertEquals( const QString &strCase, const Diff &d1, const Diff &d2 ); + void assertEquals( const QString &strCase, const QList< Diff > &list1, const QList< Diff > &list2 ); + void assertEquals( const QString &strCase, const QList< QVariant > &list1, const QList< QVariant > &list2 ); + void assertEquals( const QString &strCase, const QVariant &var1, const QVariant &var2 ); + void assertEquals( const QString &strCase, const QMap< QChar, int > &m1, const QMap< QChar, int > &m2 ); + void assertEquals( const QString &strCase, const QStringList &list1, const QStringList &list2 ); + void assertTrue( const QString &strCase, bool value ); + void assertFalse( const QString &strCase, bool value ); + void assertEmpty( const QString &strCase, const QStringList &list ); - // Construct the two texts which made up the diff originally. - QStringList diff_rebuildtexts(QList diffs); - // Private function for quickly building lists of diffs. - QList diffList( - // Diff(INSERT, NULL) is invalid and thus is used as the default argument. - Diff d1 = Diff(INSERT, NULL), Diff d2 = Diff(INSERT, NULL), - Diff d3 = Diff(INSERT, NULL), Diff d4 = Diff(INSERT, NULL), - Diff d5 = Diff(INSERT, NULL), Diff d6 = Diff(INSERT, NULL), - Diff d7 = Diff(INSERT, NULL), Diff d8 = Diff(INSERT, NULL), - Diff d9 = Diff(INSERT, NULL), Diff d10 = Diff(INSERT, NULL)); + // Construct the two texts which made up the diff originally. + QStringList diff_rebuildtexts( QList< Diff > diffs ); + // Private function for quickly building lists of diffs. + QList< Diff > diffList( + // Diff(INSERT, NULL) is invalid and thus is used as the default argument. + Diff d1 = Diff( INSERT, NULL ), Diff d2 = Diff( INSERT, NULL ), Diff d3 = Diff( INSERT, NULL ), Diff d4 = Diff( INSERT, NULL ), Diff d5 = Diff( INSERT, NULL ), Diff d6 = Diff( INSERT, NULL ), Diff d7 = Diff( INSERT, NULL ), Diff d8 = Diff( INSERT, NULL ), Diff d9 = Diff( INSERT, NULL ), Diff d10 = Diff( INSERT, NULL ) ); }; -#endif // DIFF_MATCH_PATCH_TEST_H +#endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp17/CMakeLists.txt b/cpp17/CMakeLists.txt new file mode 100644 index 00000000..c972d757 --- /dev/null +++ b/cpp17/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.22) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED YES) + + +SET( _PROJECT_NAME diff_match_patch_cpp17 ) +project( ${_PROJECT_NAME} ) +add_library(${_PROJECT_NAME} + STATIC + diff_match_patch.cpp + diff_match_patch.h + diff_match_patch_utils.cpp + diff_match_patch_utils.h +) + +target_include_directories( ${_PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR} ) +target_link_libraries( ${_PROJECT_NAME} ) + +SET( TEST_NAME "${_PROJECT_NAME}_test" ) +project( ${TEST_NAME} ) +add_executable( ${TEST_NAME} diff_match_patch_test.cpp diff_match_patch_test.h diff_match_patch_test_utils.cpp) + +target_include_directories( ${TEST_NAME} PUBLIC ${CMAKE_SOURCE_DIR}) +if( USE_GTEST ) + SET( GTEST_LIBS gtest gmock ) + target_include_directories( ${_PROJECT_NAME} PUBLIC ${GOOGLETEST_ROOT_DIR}/googletest/include ) + target_compile_definitions( ${_PROJECT_NAME} PUBLIC USE_GTEST ) + target_compile_definitions( ${TEST_NAME} PUBLIC USE_GTEST ) +endif() + +target_link_libraries( diff_match_patch_cpp17_test ${_PROJECT_NAME} ${GTEST_LIBS}) +add_test( ${TEST_NAME} ${TEST_NAME} ) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp new file mode 100644 index 00000000..b410845f --- /dev/null +++ b/cpp17/diff_match_patch.cpp @@ -0,0 +1,2396 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "diff_match_patch_utils.h" + +namespace NDiffMatchPatch +{ +#ifdef WIN32 + static std::size_t kZERO{ 0ULL }; + static std::size_t kONE{ 1ULL }; +#else + static unsigned long kZERO{ 0UL }; + static unsigned long kONE{ 1UL }; +#endif + + std::wstring toString( EOperation op ) + { + switch ( op ) + { + case EOperation::eINSERT: + return L"INSERT"; + case EOperation::eDELETE: + return L"DELETE"; + case EOperation::eEQUAL: + return L"EQUAL"; + } + throw "Invalid operation."; + } + + ////////////////////////// + // + // Diff Class + // + ////////////////////////// + + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL + * @param text The text being applied + */ + + /** + * Display a human-readable version of this Diff. + * @return text version + */ + std::wstring Diff::toString( EStringType stringType ) const + { + std::wstring retVal; + if ( stringType == EStringType::ePatch ) + { + switch ( fOperation ) + { + case EOperation::eINSERT: + retVal += L"+"; + break; + case EOperation::eDELETE: + retVal += L"-"; + break; + case EOperation::eEQUAL: + retVal += L" "; + break; + } + retVal += NUtils::toPercentEncoding( fText, L" !~*'();/?:@&=+$,#" ) + std::wstring( L"\n" ); + } + else if ( stringType == EStringType::eUnitTest ) + { + retVal = L"(" + NDiffMatchPatch::toString( fOperation ) + L" " + text() + L")"; + } + else if ( stringType == EStringType::eDefault ) + { + retVal = fText; + // Replace linebreaks with Pilcrow signs. + std::replace( retVal.begin(), retVal.end(), L'\n', L'\u00b6' ); + retVal = std::wstring( L"Diff(" ) + NDiffMatchPatch::toString( fOperation ) + std::wstring( L",\"" ) + retVal + std::wstring( L"\")" ); + } + return retVal; + } + + std::wstring Diff::toHtml() const + { + auto text = this->text(); + NUtils::replace( text, L"&", L"&" ); + NUtils::replace( text, L"<", L"<" ); + NUtils::replace( text, L">", L">" ); + NUtils::replace( text, L"\n", L"¶
" ); + if ( isInsert() ) + return std::wstring( L"" ) + text + std::wstring( L"" ); + else if ( isDelete() ) + return std::wstring( L"" ) + text + std::wstring( L"" ); + else if ( isEqual() ) + return std::wstring( L"" ) + text + std::wstring( L"" ); + return {}; + } + + std::wstring Diff::toConsole() const + { + static std::wstring kRed{ L"\033[0;31m" }; + static std::wstring kGreen{ L"\033[0;32m" }; + static std::wstring kYellow{ L"\033[0;33m" }; + static std::wstring kReset{ L"\033[m" }; + static std::wstring kEOL{ NUtils::fromPercentEncoding( L"%C2%B6" ) + L"\n" }; + + auto retVal = text(); + NUtils::replace( retVal, L"\n", kEOL ); + if ( isInsert() ) + return kGreen + retVal + kReset; + else if ( isDelete() ) + return kRed + retVal + kReset; + else if ( isEqual() ) + return retVal; + return {}; + } + + std::wstring Diff::toDelta() const + { + if ( isInsert() ) + return L"+" + NUtils::toPercentEncoding( text(), L" !~*'();/?:@&=+$,#" ) + L"\t"; + else if ( isDelete() ) + return L"-" + std::to_wstring( text().length() ) + L"\t"; + else if ( isEqual() ) + return L"=" + std::to_wstring( text().length() ) + L"\t"; + return {}; + } + + /** + * Is this Diff equivalent to another Diff? + * @param d Another Diff to compare against + * @return true or false + */ + bool Diff::operator==( const Diff &d ) const + { + return ( d.fOperation == fOperation ) && ( d.fText == fText ); + } + + bool Diff::operator!=( const Diff &d ) const + { + return !( operator==( d ) ); + } + + ///////////////////////////////////////////// + // + // Patch Class + // + ///////////////////////////////////////////// + + /** + * Constructor. Initializes with an empty list of diffs. + */ + Patch::Patch() + { + } + + Patch::Patch( std::wstring &text ) + { + std::wsmatch matches; + auto patchHeader = std::wregex( LR"(^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$)" ); + if ( !std::regex_match( text, matches, patchHeader ) || ( matches.size() != 5 ) ) + { + throw std::wstring( L"Invalid patch string: " + text ); + } + start1 = NUtils::toInt( matches[ 1 ].str() ); + if ( !matches[ 2 ].length() ) + { + start1--; + length1 = 1; + } + else if ( matches[ 2 ].str() == L"0" ) + { + length1 = 0; + } + else + { + start1--; + length1 = NUtils::toInt( matches[ 2 ].str() ); + } + + start2 = NUtils::toInt( matches[ 3 ].str() ); + if ( !matches[ 4 ].length() ) + { + start2--; + length2 = 1; + } + else if ( matches[ 4 ].str() == L"0" ) + { + length2 = 0; + } + else + { + start2--; + length2 = NUtils::toInt( matches[ 4 ].str() ); + } + text.erase( text.begin() ); + } + + bool Patch::isNull() const + { + if ( start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && diffs.empty() ) + { + return true; + } + return false; + } + + /** + * Emulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indices are printed as 1-based, not 0-based. + * @return The GNU diff string + */ + std::wstring Patch::toString() const + { + auto text = getPatchHeader(); + // Escape the body of the patch with %xx notation. + for ( auto &&aDiff : diffs ) + { + text += aDiff.toString( EStringType::ePatch ); + } + + return text; + } + + std::wstring Patch::getPatchHeader() const + { + auto coords1 = getCoordinateString( start1, length1 ); + auto coords2 = getCoordinateString( start2, length2 ); + auto text = std::wstring( L"@@ -" ) + coords1 + std::wstring( L" +" ) + coords2 + std::wstring( L" @@\n" ); + return text; + } + + std::wstring Patch::getCoordinateString( std::size_t start, std::size_t length ) const + { + std::wstring retVal; + if ( length == 0 ) + { + retVal = std::to_wstring( start ) + std::wstring( L",0" ); + } + else if ( length == 1 ) + { + retVal = std::to_wstring( start + 1 ); + } + else + { + retVal = std::to_wstring( start + 1 ) + std::wstring( L"," ) + std::to_wstring( length ); + } + return retVal; + } + + ///////////////////////////////////////////// + // + // diff_match_patch Class + // + ///////////////////////////////////////////// + + // all class members initialized in the class + diff_match_patch::diff_match_patch() + { + } + + TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2 ) + { + return diff_main( text1, text2, true ); + } + + TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ) + { + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if ( Diff_Timeout <= 0 ) + { + deadline = std::numeric_limits< clock_t >::max(); + } + else + { + deadline = clock() + (clock_t)( Diff_Timeout * CLOCKS_PER_SEC ); + } + return diff_main( text1, text2, checklines, deadline ); + } + + TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) + { + // Check for equality (speedup). + TDiffVector diffs; + if ( text1 == text2 ) + { + if ( !text1.empty() ) + { + diffs.emplace_back( EOperation::eEQUAL, text1 ); + } + return diffs; + } + + if ( !text1.empty() && text2.empty() ) + { + diffs.emplace_back( EOperation::eDELETE, text1 ); + return diffs; + } + + if ( text1.empty() && !text2.empty() ) + { + diffs.emplace_back( EOperation::eINSERT, text2 ); + return diffs; + } + + // Trim off common prefix (speedup). + auto commonlength = diff_commonPrefix( text1, text2 ); + auto commonprefix = text1.substr( 0, commonlength ); + auto textChopped1 = text1.substr( commonlength ); + auto textChopped2 = text2.substr( commonlength ); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix( textChopped1, textChopped2 ); + auto commonsuffix = textChopped1.substr( textChopped1.length() - commonlength ); + textChopped1 = textChopped1.substr( 0, textChopped1.length() - commonlength ); + textChopped2 = textChopped2.substr( 0, textChopped2.length() - commonlength ); + + // Compute the diff on the middle block. + diffs = diff_compute( textChopped1, textChopped2, checklines, deadline ); + + // Restore the prefix and suffix. + if ( !commonprefix.empty() ) + { + diffs.emplace( diffs.begin(), EOperation::eEQUAL, commonprefix ); + } + if ( !commonsuffix.empty() ) + { + diffs.emplace_back( EOperation::eEQUAL, commonsuffix ); + } + + diff_cleanupMerge( diffs ); + + return diffs; + } + + TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2 ) + { + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2, bool checklines ) + { + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines ); + } + + TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) + { + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines, deadline ); + } + + TDiffVector diff_match_patch::diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) + { + TDiffVector diffs; + + if ( text1.empty() ) + { + // Just add some text (speedup). + diffs.emplace_back( EOperation::eINSERT, text2 ); + return diffs; + } + + if ( text2.empty() ) + { + // Just delete some text (speedup). + diffs.emplace_back( EOperation::eDELETE, text1 ); + return diffs; + } + + { + auto [ longtext, shorttext ] = ( text1.length() > text2.length() ) ? std::make_pair( text1, text2 ) : std::make_pair( text2, text1 ); + auto i = longtext.find( shorttext ); + if ( i != std::string::npos ) + { + // Shorter text is inside the longer text (speedup). + const auto op = ( text1.length() > text2.length() ) ? EOperation::eDELETE : EOperation::eINSERT; + diffs.emplace_back( op, longtext.substr( 0, i ) ); + diffs.emplace_back( EOperation::eEQUAL, shorttext ); + diffs.emplace_back( op, safeMid( longtext, i + shorttext.length() ) ); + return diffs; + } + + if ( shorttext.length() == 1 ) + { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.emplace_back( EOperation::eDELETE, text1 ); + diffs.emplace_back( EOperation::eINSERT, text2 ); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } + + // Check to see if the problem can be split in two. + const TStringVector hm = diff_halfMatch( text1, text2 ); + if ( !hm.empty() ) + { + // A half-match was found, sort out the return data. + auto &&text1_a = hm[ 0 ]; + auto &&text1_b = hm[ 1 ]; + auto &&text2_a = hm[ 2 ]; + auto &&text2_b = hm[ 3 ]; + auto &&mid_common = hm[ 4 ]; + // Send both pairs off for separate processing. + diffs = diff_main( text1_a, text2_a, checklines, deadline ); + const auto &&diffs_b = diff_main( text1_b, text2_b, checklines, deadline ); + // Merge the results. + diffs.emplace_back( EOperation::eEQUAL, mid_common ); + diffs.insert( diffs.end(), diffs_b.begin(), diffs_b.end() ); + return diffs; + } + + // Perform a real diff. + if ( checklines && ( text1.length() > 100 ) && ( text2.length() > 100 ) ) + { + return diff_lineMode( text1, text2, deadline ); + } + + return diff_bisect( text1, text2, deadline ); + } + + TDiffVector diff_match_patch::diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) + { + return diff_compute( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines, deadline ); + } + + TDiffVector diff_match_patch::diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ) + { + // Scan the text on a line-by-line basis first. + auto a = diff_linesToChars( text1, text2 ); + text1 = std::get< std::wstring >( a[ 0 ] ); + text2 = std::get< std::wstring >( a[ 1 ] ); + auto linearray = std::get< TStringVector >( a[ 2 ] ); + + auto diffs = diff_main( text1, text2, false, deadline ); + + // Convert the diff back to original text. + diff_charsToLines( diffs, linearray ); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic( diffs ); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.emplace_back( EOperation::eEQUAL, L"" ); + std::size_t pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].isInsert() ) + { + count_insert++; + text_insert += diffs[ pointer ].text(); + } + else if ( diffs[ pointer ].isDelete() ) + { + count_delete++; + text_delete += diffs[ pointer ].text(); + } + else if ( diffs[ pointer ].isEqual() ) + { // Upon reaching an equality, check for prior redundancies. + if ( count_delete >= 1 && count_insert >= 1 ) + { + // Delete the offending records and add the merged ones. + auto numElements = count_delete + count_insert; + auto start = diffs.begin() + pointer - numElements; + auto end = start + numElements; + diffs.erase( start, end ); + pointer = pointer - count_delete - count_insert; + auto subDiff = diff_main( text_delete, text_insert, false, deadline ); + diffs.insert( diffs.begin() + pointer, subDiff.begin(), subDiff.end() ); + pointer = pointer + subDiff.size(); + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + } + pointer++; + } + diffs.pop_back(); // Remove the dummy entry at the end. + + return diffs; + } + + TDiffVector diff_match_patch::diff_lineMode( std::string text1, std::string text2, clock_t deadline ) + { + return diff_lineMode( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), deadline ); + } + + // using int64_t rather thant size_t due to the backward walking nature of the + // algorithm + TDiffVector diff_match_patch::diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ) + { + // Cache the text lengths to prevent multiple calls. + auto text1_length = static_cast< int64_t >( text1.length() ); + auto text2_length = static_cast< int64_t >( text2.length() ); + auto max_d = ( text1_length + text2_length + 1 ) / 2; + auto v_offset = max_d; + auto v_length = 2 * max_d; + auto v1 = std::vector< int64_t >( v_length, -1 ); + auto v2 = std::vector< int64_t >( v_length, -1 ); + v1[ v_offset + 1 ] = 0; + v2[ v_offset + 1 ] = 0; + auto delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + bool front = ( delta % 2 != 0 ); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int64_t k1start = 0; + int64_t k1end = 0; + int64_t k2start = 0; + int64_t k2end = 0; + for ( int64_t d = 0; d < max_d; d++ ) + { + // Bail out if deadline is reached. + if ( clock() > deadline ) + { + break; + } + + // Walk the front path one step. + for ( auto k1 = -d + k1start; k1 <= d - k1end; k1 += 2 ) + { + auto k1_offset = v_offset + k1; + int64_t x1; + if ( ( k1 == -d ) || ( k1 != d ) && ( v1[ k1_offset - 1 ] < v1[ k1_offset + 1 ] ) ) + { + x1 = v1[ k1_offset + 1 ]; + } + else + { + x1 = v1[ k1_offset - 1 ] + 1; + } + int64_t y1 = x1 - k1; + while ( ( x1 < text1_length ) && ( y1 < text2_length ) && ( text1[ x1 ] == text2[ y1 ] ) ) + { + x1++; + y1++; + } + v1[ k1_offset ] = x1; + if ( x1 > text1_length ) + { + // Ran off the right of the graph. + k1end += 2; + } + else if ( y1 > text2_length ) + { + // Ran off the bottom of the graph. + k1start += 2; + } + else if ( front ) + { + auto k2_offset = v_offset + delta - k1; + if ( ( k2_offset >= 0 ) && ( k2_offset < v_length ) && ( v2[ k2_offset ] != -1 ) ) + { + // Mirror x2 onto top-left coordinate system. + auto x2 = text1_length - v2[ k2_offset ]; + if ( x1 >= x2 ) + { + // Overlap detected. + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } + } + + // Walk the reverse path one step. + for ( auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2 ) + { + auto k2_offset = v_offset + k2; + int64_t x2; + if ( ( k2 == -d ) || ( k2 != d ) && ( v2[ k2_offset - 1 ] < v2[ k2_offset + 1 ] ) ) + { + x2 = v2[ k2_offset + 1 ]; + } + else + { + x2 = v2[ k2_offset - 1 ] + 1; + } + auto y2 = x2 - k2; + while ( ( x2 < text1_length ) && ( y2 < text2_length ) && ( text1[ text1_length - x2 - 1 ] == text2[ text2_length - y2 - 1 ] ) ) + { + x2++; + y2++; + } + v2[ k2_offset ] = x2; + if ( x2 > text1_length ) + { + // Ran off the left of the graph. + k2end += 2; + } + else if ( y2 > text2_length ) + { + // Ran off the top of the graph. + k2start += 2; + } + else if ( !front ) + { + auto k1_offset = v_offset + delta - k2; + if ( ( k1_offset >= 0 ) && ( k1_offset < v_length ) && ( v1[ k1_offset ] != -1 ) ) + { + auto x1 = v1[ k1_offset ]; + auto y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[ k2_offset ]; + if ( x1 >= x2 ) + { + // Overlap detected. + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + auto diffs = TDiffVector( { Diff( EOperation::eDELETE, text1 ), Diff( EOperation::eINSERT, text2 ) } ); + return diffs; + } + + TDiffVector diff_match_patch::diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ) + { + return diff_bisect( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), deadline ); + } + + TDiffVector diff_match_patch::diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ) + { + auto text1a = text1.substr( 0, x ); + auto text2a = text2.substr( 0, y ); + auto text1b = safeMid( text1, x ); + auto text2b = safeMid( text2, y ); + + // Compute both diffs serially. + TDiffVector diffs = diff_main( text1a, text2a, false, deadline ); + TDiffVector diffsb = diff_main( text1b, text2b, false, deadline ); + + diffs.insert( diffs.end(), diffsb.begin(), diffsb.end() ); + return diffs; + } + + TDiffVector diff_match_patch::diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ) + { + return diff_bisectSplit( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), x, y, deadline ); + } + + diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ) + { + TStringVector lineArray; + std::unordered_map< std::wstring, std::size_t > lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a nullptr character. + lineArray.emplace_back( L"" ); + + const std::wstring chars1 = diff_linesToCharsMunge( text1, lineArray, lineHash ); + const std::wstring chars2 = diff_linesToCharsMunge( text2, lineArray, lineHash ); + + TVariantVector listRet; + listRet.emplace_back( chars1 ); + listRet.emplace_back( chars2 ); + listRet.emplace_back( lineArray ); + return listRet; + } + + std::vector< diff_match_patch::diff_match_patch::TVariant > diff_match_patch::diff_linesToChars( const std::string &text1, const std::string &text2 ) + { + return diff_linesToChars( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + std::wstring diff_match_patch::diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ) + { + std::size_t lineStart = 0; + std::size_t lineEnd = std::string::npos; + std::wstring line; + std::wstring chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + bool firstTime = true; + while ( ( firstTime && ( lineEnd == -1 ) && !text.empty() ) || lineEnd < ( text.length() - 1 ) ) + { + firstTime = false; + lineEnd = text.find( '\n', lineStart ); + if ( lineEnd == -1 ) + { + lineEnd = text.length() - 1; + } + line = safeMid( text, lineStart, lineEnd + 1 - lineStart ); + + auto pos = lineHash.find( line ); + if ( pos != lineHash.end() ) + { + chars += static_cast< wchar_t >( ( *pos ).second ); + } + else + { + lineArray.emplace_back( line ); + lineHash[ line ] = lineArray.size() - 1; + chars += static_cast< wchar_t >( lineArray.size() - 1 ); + } + + lineStart = lineEnd + 1; + } + return chars; + } + + void diff_match_patch::diff_charsToLines( TDiffVector &diffs, const TStringVector &lineArray ) + { + for ( auto &&diff : diffs ) + { + std::wstring text; + for ( auto &&y : diff.text() ) + { + text += lineArray[ y ]; + } + diff.fText = text; + } + } + + std::size_t diff_match_patch::diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ) + { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto n = std::min( text1.length(), text2.length() ); + for ( std::size_t i = 0; i < n; i++ ) + { + if ( text1[ i ] != text2[ i ] ) + { + return i; + } + } + return n; + } + + std::size_t diff_match_patch::diff_commonPrefix( const std::string &text1, const std::string &text2 ) + { + return diff_commonPrefix( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + std::size_t diff_match_patch::diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ) + { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + const auto n = std::min( text1_length, text2_length ); + for ( std::size_t i = 1; i <= n; i++ ) + { + if ( text1[ text1_length - i ] != text2[ text2_length - i ] ) + { + return i - 1; + } + } + return n; + } + + std::size_t diff_match_patch::diff_commonSuffix( const std::string &text1, const std::string &text2 ) + { + return diff_commonSuffix( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + std::size_t diff_match_patch::diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ) + { + // Cache the text lengths to prevent multiple calls. + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + // Eliminate the nullptr case. + if ( text1_length == 0 || text2_length == 0 ) + { + return 0; + } + // Truncate the longer string. + std::wstring text1_trunc = text1; + std::wstring text2_trunc = text2; + if ( text1_length > text2_length ) + { + text1_trunc = text1.substr( text1_length - text2_length ); + } + else if ( text1_length < text2_length ) + { + text2_trunc = text2.substr( 0, text1_length ); + } + const auto text_length = std::min( text1_length, text2_length ); + // Quick check for the worst case. + if ( text1_trunc == text2_trunc ) + { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + std::size_t best = 0; + std::size_t length = 1; + while ( true ) + { + std::wstring pattern = ( length < text1_trunc.length() ) ? text1_trunc.substr( text_length - length ) : std::wstring(); + if ( pattern.empty() ) + return best; + + auto found = text2_trunc.find( pattern ); + if ( found == std::string::npos ) + { + return best; + } + length += found; + if ( found == 0 || text1_trunc.substr( text_length - length ) == text2_trunc.substr( 0, length ) ) + { + best = length; + length++; + } + } + } + + std::size_t diff_match_patch::diff_commonOverlap( const std::string &text1, const std::string &text2 ) + { + return diff_commonOverlap( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ) + { + if ( Diff_Timeout <= 0 ) + { + // Don't risk returning a non-optimal diff if we have unlimited time. + return {}; + } + const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; + const std::wstring shorttext = text1.length() > text2.length() ? text2 : text1; + if ( ( longtext.length() < 4 ) || ( ( shorttext.length() * 2 ) < longtext.length() ) ) + { + return {}; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + const TStringVector hm1 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 3 ) / 4 ); + // Check again based on the third quarter. + const TStringVector hm2 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 1 ) / 2 ); + TStringVector hm; + if ( hm1.empty() && hm2.empty() ) + { + return {}; + } + else if ( hm2.empty() ) + { + hm = hm1; + } + else if ( hm1.empty() ) + { + hm = hm2; + } + else + { + // Both matched. Select the longest. + hm = hm1[ 4 ].length() > hm2[ 4 ].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if ( text1.length() > text2.length() ) + { + return hm; + } + else + { + TStringVector listRet( { hm[ 2 ], hm[ 3 ], hm[ 0 ], hm[ 1 ], hm[ 4 ] } ); + return listRet; + } + } + + diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::string &text1, const std::string &text2 ) + { + return diff_halfMatch( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ) + { + // Start with a 1/4 length substring at position i as a seed. + const std::wstring seed = safeMid( longtext, i, longtext.length() / 4 ); + std::size_t j = std::string::npos; + std::wstring best_common; + std::wstring best_longtext_a, best_longtext_b; + std::wstring best_shorttext_a, best_shorttext_b; + while ( ( j = shorttext.find( seed, j + 1 ) ) != std::string::npos ) + { + const auto prefixLength = diff_commonPrefix( safeMid( longtext, i ), safeMid( shorttext, j ) ); + const auto suffixLength = diff_commonSuffix( longtext.substr( 0, i ), shorttext.substr( 0, j ) ); + if ( best_common.length() < suffixLength + prefixLength ) + { + best_common = safeMid( shorttext, j - suffixLength, suffixLength ) + safeMid( shorttext, j, prefixLength ); + best_longtext_a = longtext.substr( 0, i - suffixLength ); + best_longtext_b = safeMid( longtext, i + prefixLength ); + best_shorttext_a = shorttext.substr( 0, j - suffixLength ); + best_shorttext_b = safeMid( shorttext, j + prefixLength ); + } + } + if ( best_common.length() * 2 >= longtext.length() ) + { + TStringVector listRet( { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common } ); + return listRet; + } + else + { + return {}; + } + } + + diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ) + { + return diff_halfMatchI( NUtils::to_wstring( longtext ), NUtils::to_wstring( shorttext ), i ); + } + + void diff_match_patch::diff_cleanupSemantic( TDiffVector &diffs ) + { + if ( diffs.empty() ) + return; + + bool changes = false; + // Stack of indices where equalities are found. + std::stack< std::size_t > equalities; // stack of equalities + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + std::size_t length_insertions1 = 0; + std::size_t length_deletions1 = 0; + // Number of characters that changed after the equality. + std::size_t length_insertions2 = 0; + std::size_t length_deletions2 = 0; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].isEqual() ) + { // Equality found. + equalities.push( pointer ); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality = diffs[ pointer ].text(); + } + else + { // an insertion or deletion + if ( diffs[ pointer ].isInsert() ) + { + length_insertions2 += diffs[ pointer ].text().length(); + } + else + { + length_deletions2 += diffs[ pointer ].text().length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if ( !lastEquality.empty() && ( lastEquality.length() <= std::max( length_insertions1, length_deletions1 ) ) && ( lastEquality.length() <= std::max( length_insertions2, length_deletions2 ) ) ) + { + // Duplicate record. + diffs.insert( diffs.begin() + equalities.top(), Diff( EOperation::eDELETE, lastEquality ) ); + // Change second copy to insert. + diffs[ equalities.top() + 1 ].fOperation = EOperation::eINSERT; + // Throw away the equality we just deleted. + equalities.pop(); + if ( !equalities.empty() ) + { + equalities.pop(); + } + pointer = !equalities.empty() ? equalities.top() : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality.clear(); + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if ( changes ) + { + diff_cleanupMerge( diffs ); + } + diff_cleanupSemanticLossless( diffs ); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer - 1 ].isDelete() && diffs[ pointer ].isInsert() ) + { + auto deletion = diffs[ pointer - 1 ].text(); + auto insertion = diffs[ pointer ].text(); + std::size_t overlap_length1 = diff_commonOverlap( deletion, insertion ); + std::size_t overlap_length2 = diff_commonOverlap( insertion, deletion ); + if ( overlap_length1 >= overlap_length2 ) + { + if ( overlap_length1 >= deletion.length() / 2.0 || overlap_length1 >= insertion.length() / 2.0 ) + { + // Overlap found. + // Insert an equality and trim the surrounding edits. + diffs.emplace( diffs.begin() + pointer, EOperation::eEQUAL, insertion.substr( 0, overlap_length1 ) ); + diffs[ pointer - 1 ].fText = deletion.substr( 0, deletion.length() - overlap_length1 ); + diffs[ pointer + 1 ].fText = insertion.substr( overlap_length1 ); + pointer++; + } + } + else + { + if ( overlap_length2 >= deletion.length() / 2.0 || overlap_length2 >= insertion.length() / 2.0 ) + { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.emplace( diffs.begin() + pointer, EOperation::eEQUAL, deletion.substr( 0, overlap_length2 ) ); + diffs[ pointer - 1 ].fOperation = EOperation::eINSERT; + diffs[ pointer - 1 ].fText = insertion.substr( 0, insertion.length() - overlap_length2 ); + diffs[ pointer + 1 ].fOperation = EOperation::eDELETE; + diffs[ pointer + 1 ].fText = deletion.substr( overlap_length2 ); + pointer++; + } + } + pointer++; + } + pointer++; + } + } + + void diff_match_patch::diff_cleanupSemanticLossless( TDiffVector &diffs ) + { + int pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while ( ( pointer != -1 ) && !diffs.empty() && ( pointer < ( diffs.size() - 1 ) ) ) + { + if ( diffs[ pointer - 1 ].isEqual() && diffs[ pointer + 1 ].isEqual() ) + { + // This is a single edit surrounded by equalities. + auto equality1 = diffs[ pointer - 1 ].text(); + auto edit = diffs[ pointer ].text(); + auto equality2 = diffs[ pointer + 1 ].text(); + + // First, shift the edit as far left as possible. + auto commonOffset = diff_commonSuffix( equality1, edit ); + if ( commonOffset > 0 ) + { + auto commonString = safeMid( edit, edit.length() - commonOffset ); + equality1 = equality1.substr( 0, equality1.length() - commonOffset ); + edit = commonString + edit.substr( 0, edit.length() - commonOffset ); + equality2 = commonString + equality2; + } + + // Second, step character by character right, + // looking for the best fit. + auto bestEquality1 = equality1; + auto bestEdit = edit; + auto bestEquality2 = equality2; + auto bestScore = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + while ( !edit.empty() && !equality2.empty() && edit[ 0 ] == equality2[ 0 ] ) + { + equality1 += edit[ 0 ]; + edit = edit.substr( 1 ) + equality2[ 0 ]; + equality2 = equality2.substr( 1 ); + auto score = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + // The >= encourages trailing rather than leading whitespace on + // edits. + if ( score >= bestScore ) + { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if ( diffs[ pointer - 1 ].text() != bestEquality1 ) + { + // We have an improvement, save it back to the diff. + if ( !bestEquality1.empty() ) + { + diffs[ pointer - 1 ].fText = bestEquality1; + } + else + { + diffs.erase( diffs.begin() + pointer - 1 ); + pointer--; + } + diffs[ pointer ].fText = bestEdit; + if ( !bestEquality2.empty() ) + { + diffs[ pointer + 1 ].fText = bestEquality2; + } + else + { + diffs.erase( diffs.begin() + pointer + 1 ); + pointer--; + } + } + } + pointer++; + } + } + + int64_t diff_match_patch::diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ) + { + if ( one.empty() || two.empty() ) + { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + auto char1 = one[ one.length() - 1 ]; + auto char2 = two[ 0 ]; + bool nonAlphaNumeric1 = !std::iswalnum( char1 ); + bool nonAlphaNumeric2 = !std::iswalnum( char2 ); + bool whitespace1 = nonAlphaNumeric1 && std::iswspace( char1 ); + bool whitespace2 = nonAlphaNumeric2 && std::iswspace( char2 ); + bool lineBreak1 = whitespace1 && std::iswcntrl( char1 ); + bool lineBreak2 = whitespace2 && std::iswcntrl( char2 ); + bool blankLine1 = lineBreak1 && std::regex_search( one, BLANKLINEEND ); + bool blankLine2 = lineBreak2 && std::regex_search( two, BLANKLINESTART ); + + if ( blankLine1 || blankLine2 ) + { + // Five points for blank lines. + return 5; + } + else if ( lineBreak1 || lineBreak2 ) + { + // Four points for line breaks. + return 4; + } + else if ( nonAlphaNumeric1 && !whitespace1 && whitespace2 ) + { + // Three points for end of sentences. + return 3; + } + else if ( whitespace1 || whitespace2 ) + { + // Two points for whitespace. + return 2; + } + else if ( nonAlphaNumeric1 || nonAlphaNumeric2 ) + { + // One point for non-alphanumeric. + return 1; + } + return 0; + } + + int64_t diff_match_patch::diff_cleanupSemanticScore( const std::string &one, const std::string &two ) + { + return diff_cleanupSemanticScore( NUtils::to_wstring( one ), NUtils::to_wstring( two ) ); + } + + // Define some regex patterns for matching boundaries. + std::wregex diff_match_patch::BLANKLINEEND = std::wregex( LR"(\n\r?\n$)" ); + std::wregex diff_match_patch::BLANKLINESTART = std::wregex( LR"(^\r?\n\r?\n)" ); + + void diff_match_patch::diff_cleanupEfficiency( TDiffVector &diffs ) + { + bool changes = false; + // Stack of indices where equalities are found. + std::stack< std::size_t > equalities; + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].isEqual() ) + { // Equality found. + if ( diffs[ pointer ].text().length() < Diff_EditCost && ( post_ins || post_del ) ) + { + // Candidate found. + equalities.push( pointer ); + pre_ins = post_ins; + pre_del = post_del; + lastEquality = diffs[ pointer ].text(); + } + else + { + // Not a candidate, and can never become one. + equalities = {}; + lastEquality.clear(); + } + post_ins = post_del = false; + } + else + { // An insertion or deletion. + if ( diffs[ pointer ].isDelete() ) + { + post_del = true; + } + else + { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if ( ( lastEquality.length() != 0 ) && ( ( pre_ins && pre_del && post_ins && post_del ) || ( ( lastEquality.length() < Diff_EditCost / 2 ) && ( ( pre_ins ? 1 : 0 ) + ( pre_del ? 1 : 0 ) + ( post_ins ? 1 : 0 ) + ( post_del ? 1 : 0 ) ) == 3 ) ) ) + { + // Duplicate record. + diffs.emplace( diffs.begin() + equalities.top(), EOperation::eDELETE, lastEquality ); + // Change second copy to insert. + diffs[ equalities.top() + 1 ].fOperation = EOperation::eINSERT; + equalities.pop(); // Throw away the equality we just deleted. + lastEquality.clear(); + if ( pre_ins && pre_del ) + { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities = {}; + } + else + { + if ( !equalities.empty() ) + { + equalities.pop(); + } + + pointer = !equalities.empty() ? equalities.top() : -1; + post_ins = post_del = false; + } + changes = true; + } + } + pointer++; + } + + if ( changes ) + { + diff_cleanupMerge( diffs ); + } + } + + void diff_match_patch::diff_cleanupMerge( TDiffVector &diffs ) + { + diffs.emplace_back( EOperation::eEQUAL, L"" ); + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].isInsert() ) + { + count_insert++; + text_insert += diffs[ pointer ].text(); + pointer++; + } + else if ( diffs[ pointer ].isDelete() ) + { + count_delete++; + text_delete += diffs[ pointer ].text(); + pointer++; + } + else if ( diffs[ pointer ].isEqual() ) + { + // Upon reaching an equality, check for prior redundancies. + if ( count_delete + count_insert > 1 ) + { + if ( count_delete != 0 && count_insert != 0 ) + { + // Factor out any common prefixies. + auto commonlength = diff_commonPrefix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + if ( ( pointer > ( count_delete + count_insert ) ) && diffs[ pointer - ( count_delete + count_insert ) - 1 ].isEqual() ) + { + diffs[ pointer - count_delete - count_insert - 1 ].fText += text_insert.substr( 0, commonlength ); + } + else + { + diffs.emplace( diffs.begin(), EOperation::eEQUAL, text_insert.substr( 0, commonlength ) ); + pointer++; + } + text_insert = text_insert.substr( commonlength ); + text_delete = text_delete.substr( commonlength ); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + diffs[ pointer ].fText = safeMid( text_insert, text_insert.length() - commonlength ) + diffs[ pointer ].text(); + text_insert = text_insert.substr( 0, text_insert.length() - commonlength ); + text_delete = text_delete.substr( 0, text_delete.length() - commonlength ); + } + } + // Delete the offending records and add the merged ones. + pointer -= count_delete + count_insert; + NUtils::Splice( diffs, pointer, count_delete + count_insert ); + if ( !text_delete.empty() ) + { + NUtils::Splice( diffs, pointer, 0, { Diff( EOperation::eDELETE, text_delete ) } ); + pointer++; + } + if ( !text_insert.empty() ) + { + NUtils::Splice( diffs, pointer, 0, { Diff( EOperation::eINSERT, text_insert ) } ); + pointer++; + } + pointer++; + } + else if ( pointer != 0 && diffs[ pointer - 1 ].isEqual() ) + { + // Merge this equality with the previous one. + diffs[ pointer - 1 ].fText += diffs[ pointer ].text(); + diffs.erase( diffs.begin() + pointer ); + } + else + { + pointer++; + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + } + } + if ( diffs.back().text().empty() ) + { + diffs.pop_back(); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by + // equalities which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + bool changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while ( !diffs.empty() && pointer < ( diffs.size() - 1 ) ) + { + if ( diffs[ pointer - 1 ].isEqual() && diffs[ pointer + 1 ].isEqual() ) + { + // This is a single edit surrounded by equalities. + if ( NUtils::endsWith( diffs[ pointer ].text(), diffs[ pointer - 1 ].text() ) ) + { + // Shift the edit over the previous equality. + diffs[ pointer ].fText = diffs[ pointer - 1 ].text() + diffs[ pointer ].text().substr( 0, diffs[ pointer ].text().length() - diffs[ pointer - 1 ].text().length() ); + diffs[ pointer + 1 ].fText = diffs[ pointer - 1 ].text() + diffs[ pointer + 1 ].text(); + NUtils::Splice( diffs, pointer - 1, 1 ); + changes = true; + } + else if ( diffs[ pointer ].text().find( diffs[ pointer + 1 ].text() ) == 0 ) + { + // Shift the edit over the next equality. + diffs[ pointer - 1 ].fText += diffs[ pointer + 1 ].text(); + diffs[ pointer ].fText = diffs[ pointer ].text().substr( diffs[ pointer + 1 ].text().length() ) + diffs[ pointer + 1 ].text(); + NUtils::Splice( diffs, pointer + 1, 1 ); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if ( changes ) + { + diff_cleanupMerge( diffs ); + } + } + std::size_t diff_match_patch::diff_xIndex( const TDiffVector &diffs, std::size_t loc ) + { + std::size_t chars1 = 0; + std::size_t chars2 = 0; + std::size_t last_chars1 = 0; + std::size_t last_chars2 = 0; + Diff lastDiff; + for ( auto &&aDiff : diffs ) + { + if ( !aDiff.isInsert() ) + { + // Equality or deletion. + chars1 += aDiff.text().length(); + } + if ( !aDiff.isDelete() ) + { + // Equality or insertion. + chars2 += aDiff.text().length(); + } + if ( chars1 > loc ) + { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if ( lastDiff.isDelete() ) + { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + ( loc - last_chars1 ); + } + + std::wstring diff_match_patch::diff_prettyHtml( const TDiffVector &diffs ) + { + std::wstring html; + for ( auto &&aDiff : diffs ) + { + html += aDiff.toHtml(); + } + return html; + } + + std::wstring diff_match_patch::diff_prettyConsole( const TDiffVector &diffs ) + { + std::wstring retVal; + for ( auto &&aDiff : diffs ) + { + retVal += aDiff.toConsole(); + } + return retVal; + } + + std::wstring diff_match_patch::diff_text1( const TDiffVector &diffs ) + { + std::wstring text; + for ( auto &&aDiff : diffs ) + { + if ( !aDiff.isInsert() ) + { + text += aDiff.text(); + } + } + return text; + } + + std::wstring diff_match_patch::diff_text2( const TDiffVector &diffs ) + { + std::wstring text; + for ( auto &&aDiff : diffs ) + { + if ( !aDiff.isDelete() ) + { + text += aDiff.text(); + } + } + return text; + } + + std::size_t diff_match_patch::diff_levenshtein( const TDiffVector &diffs ) + { + std::size_t levenshtein = 0; + std::size_t insertions = 0; + std::size_t deletions = 0; + for ( auto &&aDiff : diffs ) + { + if ( aDiff.isInsert() ) + insertions += aDiff.text().length(); + else if ( aDiff.isDelete() ) + deletions += aDiff.text().length(); + else if ( aDiff.isEqual() ) + { + // A deletion and an insertion is one substitution. + levenshtein += std::max( insertions, deletions ); + insertions = 0; + deletions = 0; + } + } + levenshtein += std::max( insertions, deletions ); + return levenshtein; + } + + std::wstring diff_match_patch::diff_toDelta( const TDiffVector &diffs ) + { + std::wstring text; + for ( auto &&aDiff : diffs ) + { + text += aDiff.toDelta(); + } + if ( !text.empty() ) + { + // Strip off trailing tab character. + text = text.substr( 0, text.length() - 1 ); + } + return text; + } + + TDiffVector diff_match_patch::diff_fromDelta( const std::wstring &text1, const std::wstring &delta ) + { + TDiffVector diffs; + std::size_t pointer = 0; // Cursor in text1 + auto tokens = NUtils::splitString( delta, L"\t", false ); + for ( auto &&token : tokens ) + { + if ( token.empty() ) + { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + std::wstring param = safeMid( token, 1 ); + switch ( token[ 0 ] ) + { + case '+': + NUtils::replace( param, L"+", L"%2b" ); + param = NUtils::fromPercentEncoding( param ); + diffs.emplace_back( EOperation::eINSERT, param ); + break; + case '-': + // Fall through. + case '=': + { + auto n = NUtils::toInt( param ); + if ( n < 0 ) + { + throw std::wstring( L"Negative number in diff_fromDelta: " + param ); + } + std::wstring text; + if ( ( pointer + n ) > text1.length() ) + { + throw std::wstring( L"Delta length (" + std::to_wstring( pointer + n ) + L") larger than source text length (" + std::to_wstring( text1.length() ) + L")." ); + } + + text = safeMid( text1, pointer, n ); + pointer += n; + if ( token[ 0 ] == L'=' ) + { + diffs.emplace_back( EOperation::eEQUAL, text ); + } + else + { + diffs.emplace_back( EOperation::eDELETE, text ); + } + break; + } + default: + throw std::wstring( L"Invalid diff operation in diff_fromDelta: " + token[ 0 ] ); + } + } + if ( pointer != text1.length() ) + { + throw std::wstring( L"Delta length (" ) + std::to_wstring( pointer ) + L") smaller than source text length (" + std::to_wstring( text1.length() ) + L")"; + } + return diffs; + } + + TDiffVector diff_match_patch::diff_fromDelta( const std::string &text1, const std::string &delta ) + { + return diff_fromDelta( NUtils::to_wstring( text1 ), NUtils::to_wstring( delta ) ); + } + + // MATCH FUNCTIONS + + std::size_t diff_match_patch::match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) + { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + loc = std::max( kZERO, std::min( loc, text.length() ) ); + if ( text == pattern ) + { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } + else if ( text.empty() ) + { + // Nothing to match. + return -1; + } + else if ( loc + pattern.length() <= text.length() && safeMid( text, loc, pattern.length() ) == pattern ) + { + // Perfect match at the perfect spot! (Includes case of nullptr pattern) + return loc; + } + else + { + // Do a fuzzy compare. + return match_bitap( text, pattern, loc ); + } + } + + std::size_t diff_match_patch::match_main( const std::string &text, const std::string &pattern, std::size_t loc ) + { + return match_main( NUtils::to_wstring( text ), NUtils::to_wstring( pattern ), loc ); + } + + std::size_t diff_match_patch::match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) + { + if ( !( Match_MaxBits == 0 || pattern.length() <= Match_MaxBits ) ) + { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + auto &&s = match_alphabet( pattern ); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + auto best_loc = text.find( pattern, loc ); + if ( best_loc != std::string::npos ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + // What about in the other direction? (speedup) + auto start = std::min( loc + pattern.length(), text.length() ); + best_loc = text.rfind( pattern, start ); + if ( best_loc != std::string::npos ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + } + } + + // Initialise the bit arrays. + auto matchmask = 1 << ( pattern.length() - 1 ); + best_loc = std::string::npos; + + std::size_t bin_min, bin_mid; + auto bin_max = pattern.length() + text.length(); + std::vector< int64_t > rd; + std::vector< int64_t > last_rd; + for ( int d = 0; d < pattern.length(); d++ ) + { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while ( bin_min < bin_mid ) + { + if ( match_bitapScore( d, loc + bin_mid, loc, pattern ) <= score_threshold ) + { + bin_min = bin_mid; + } + else + { + bin_max = bin_mid; + } + bin_mid = ( bin_max - bin_min ) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + auto start = std::max( kONE, ( loc > bin_mid ) ? ( loc - bin_mid + 1 ) : kZERO ); + auto finish = std::min( loc + bin_mid, text.length() ) + pattern.length(); + + rd = std::vector< int64_t >( finish + 2, 0 ); + rd[ finish + 1 ] = ( 1 << d ) - 1; + for ( auto j = finish; ( j != -1 ) && ( j >= start ); j-- ) + { + int64_t charMatch; + if ( text.length() <= j - 1 ) + { + // Out of range. + charMatch = 0; + } + else + { + auto pos = s.find( text[ j - 1 ] ); + if ( pos == s.end() ) + charMatch = 0; + else + charMatch = ( *pos ).second; + } + if ( d == 0 ) + { + // First pass: exact match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch; + } + else + { + // Subsequent passes: fuzzy match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch | ( ( ( last_rd[ j + 1 ] | last_rd[ j ] ) << 1 ) | 1 ) | last_rd[ j + 1 ]; + } + if ( ( rd[ j ] & matchmask ) != 0 ) + { + double score = match_bitapScore( d, j - 1, loc, pattern ); + // This match will almost certainly be better than any existing + // match. But check anyway. + if ( score <= score_threshold ) + { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if ( best_loc > loc ) + { + // When passing loc, don't exceed our current distance from loc. + start = std::max( kONE, ( 2 * loc > best_loc ) ? 2 * loc - best_loc : 1 ); + } + else + { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if ( match_bitapScore( d + 1, loc, loc, pattern ) > score_threshold ) + { + // No hope for a (better) match at greater error levels. + break; + } + last_rd = std::move( rd ); + } + return best_loc; + } + + std::size_t diff_match_patch::match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ) + { + return match_bitap( NUtils::to_wstring( text ), NUtils::to_wstring( pattern ), loc ); + } + + double diff_match_patch::match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ) + { + const float accuracy = static_cast< float >( e ) / pattern.length(); + const auto proximity = std::abs( loc - x ); + if ( Match_Distance == 0 ) + { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + ( proximity / static_cast< float >( Match_Distance ) ); + } + + diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::wstring &pattern ) + { + TCharPosMap s; + std::size_t i; + for ( i = 0; i < pattern.length(); i++ ) + { + auto c = pattern[ i ]; + s[ c ] = 0; + } + for ( i = 0; i < pattern.length(); i++ ) + { + auto c = pattern[ i ]; + auto pos = s.find( c ); + std::size_t prev = 0; + if ( pos != s.end() ) + prev = ( *pos ).second; + s[ c ] = prev | ( 1ULL << ( pattern.length() - i - 1 ) ); + } + return s; + } + + diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::string &pattern ) + { + return match_alphabet( NUtils::to_wstring( pattern ) ); + } + + // PATCH FUNCTIONS + + void diff_match_patch::patch_addContext( Patch &patch, const std::wstring &text ) + { + if ( text.empty() ) + { + return; + } + std::wstring pattern = safeMid( text, patch.start2, patch.length1 ); + std::size_t padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while ( ( text.find( pattern ) != text.rfind( pattern ) ) && ( pattern.length() < ( Match_MaxBits - Patch_Margin - Patch_Margin ) ) ) + { + padding += Patch_Margin; + pattern = safeMid( text, std::max( kZERO, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0UL ) ), std::min( text.length(), patch.start2 + patch.length1 + padding ) - std::max( kZERO, ( patch.start2 > padding ) ? patch.start2 - padding : 0 ) ); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + std::wstring prefix = safeMid( text, std::max( kZERO, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0UL ) ), patch.start2 - std::max( kZERO, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0UL ) ) ); + if ( !prefix.empty() ) + { + patch.diffs.emplace( patch.diffs.begin(), EOperation::eEQUAL, prefix ); + } + // Add the suffix. + std::wstring suffix = safeMid( text, patch.start2 + patch.length1, std::min( text.length(), patch.start2 + patch.length1 + padding ) - ( patch.start2 + patch.length1 ) ); + if ( !suffix.empty() ) + { + patch.diffs.emplace_back( EOperation::eEQUAL, suffix ); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); + } + + void diff_match_patch::patch_addContext( Patch &patch, const std::string &text ) + { + return patch_addContext( patch, NUtils::to_wstring( text ) ); + } + + TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring &text2 ) + { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + // No diffs provided, compute our own. + TDiffVector diffs = diff_main( text1, text2, true ); + if ( diffs.size() > 2 ) + { + diff_cleanupSemantic( diffs ); + diff_cleanupEfficiency( diffs ); + } + + return patch_make( text1, diffs ); + } + + TPatchVector diff_match_patch::patch_make( const TDiffVector &diffs ) + { + // No origin string provided, compute our own. + const std::wstring text1 = diff_text1( diffs ); + return patch_make( text1, diffs ); + } + + TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring & /*text2*/, const TDiffVector &diffs ) + { + // text2 is entirely unused. + return patch_make( text1, diffs ); + } + + TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const TDiffVector &diffs ) + { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + TPatchVector patches; + if ( diffs.empty() ) + { + return patches; // Get rid of the nullptr case. + } + Patch patch; + std::size_t char_count1 = 0; // Number of characters into the text1 string. + std::size_t char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + std::wstring prepatch_text = text1; + std::wstring postpatch_text = text1; + for ( auto &&aDiff : diffs ) + { + if ( patch.diffs.empty() && !aDiff.isEqual() ) + { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + if ( aDiff.isInsert() ) + { + patch.diffs.push_back( aDiff ); + patch.length2 += aDiff.text().length(); + postpatch_text = postpatch_text.substr( 0, char_count2 ) + aDiff.text() + safeMid( postpatch_text, char_count2 ); + } + else if ( aDiff.isDelete() ) + { + patch.length1 += aDiff.text().length(); + patch.diffs.push_back( aDiff ); + postpatch_text = postpatch_text.substr( 0, char_count2 ) + safeMid( postpatch_text, char_count2 + aDiff.text().length() ); + } + else if ( aDiff.isEqual() ) + { + if ( aDiff.text().length() <= 2 * Patch_Margin && !patch.diffs.empty() && !( aDiff == diffs.back() ) ) + { + // Small equality inside a patch. + patch.diffs.push_back( aDiff ); + patch.length1 += aDiff.text().length(); + patch.length2 += aDiff.text().length(); + } + + if ( aDiff.text().length() >= 2 * Patch_Margin ) + { + // Time for a new patch. + if ( !patch.diffs.empty() ) + { + patch_addContext( patch, prepatch_text ); + patches.emplace_back( patch ); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + } + + // Update the current character count. + if ( !aDiff.isInsert() ) + { + char_count1 += aDiff.text().length(); + } + if ( !aDiff.isDelete() ) + { + char_count2 += aDiff.text().length(); + } + } + // Pick up the leftover patch if not empty. + if ( !patch.diffs.empty() ) + { + patch_addContext( patch, prepatch_text ); + patches.emplace_back( patch ); + } + + return patches; + } + + TPatchVector diff_match_patch::patch_make( const std::string &text1, const TDiffVector &diffs ) + { + return patch_make( NUtils::to_wstring( text1 ), diffs ); + } + + TPatchVector diff_match_patch::patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ) + { + return patch_make( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), diffs ); + } + + TPatchVector diff_match_patch::patch_make( const std::string &text1, const std::string &text2 ) + { + return patch_make( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + TPatchVector diff_match_patch::patch_deepCopy( const TPatchVector &patches ) + { + TPatchVector patchesCopy; + for ( auto &&aPatch : patches ) + { + Patch patchCopy = Patch(); + for ( auto &&aDiff : aPatch.diffs ) + { + patchCopy.diffs.push_back( aDiff ); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.emplace_back( patchCopy ); + } + return patchesCopy; + } + + std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TPatchVector patches, std::wstring text ) + { + if ( patches.empty() ) + { + return { text, std::vector< bool >( 0 ) }; + } + + // Deep copy the patches so that no changes are made to originals. + patches = patch_deepCopy( patches ); + + std::wstring nullPadding = patch_addPadding( patches ); + text = nullPadding + text + nullPadding; + patch_splitMax( patches ); + + std::size_t x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + uint64_t delta = 0; + std::vector< bool > results( patches.size() ); + for ( auto &&aPatch : patches ) + { + auto expected_loc = aPatch.start2 + delta; + std::wstring text1 = diff_text1( aPatch.diffs ); + std::size_t start_loc; + std::size_t end_loc = std::string::npos; + if ( text1.length() > Match_MaxBits ) + { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main( text, text1.substr( 0, Match_MaxBits ), expected_loc ); + if ( start_loc != -1 ) + { + end_loc = match_main( text, text1.substr( text1.length() - Match_MaxBits ), expected_loc + text1.length() - Match_MaxBits ); + if ( end_loc == -1 || start_loc >= end_loc ) + { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } + else + { + start_loc = match_main( text, text1, expected_loc ); + } + if ( start_loc == -1 ) + { + // No match found. :( + results[ x ] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } + else + { + // Found a match. :) + results[ x ] = true; + delta = start_loc - expected_loc; + std::wstring text2; + if ( end_loc == -1 ) + { + text2 = safeMid( text, start_loc, text1.length() ); + } + else + { + text2 = safeMid( text, start_loc, end_loc + Match_MaxBits - start_loc ); + } + if ( text1 == text2 ) + { + // Perfect match, just shove the replacement text in. + text = text.substr( 0, start_loc ) + diff_text2( aPatch.diffs ) + safeMid( text, start_loc + text1.length() ); + } + else + { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + TDiffVector diffs = diff_main( text1, text2, false ); + if ( text1.length() > Match_MaxBits && diff_levenshtein( diffs ) / static_cast< float >( text1.length() ) > Patch_DeleteThreshold ) + { + // The end points match, but the content is unacceptably bad. + results[ x ] = false; + } + else + { + diff_cleanupSemanticLossless( diffs ); + std::size_t index1 = 0; + for ( auto &&aDiff : aPatch.diffs ) + { + if ( !aDiff.isEqual() ) + { + auto index2 = diff_xIndex( diffs, index1 ); + if ( aDiff.isInsert() ) + { + // Insertion + text = text.substr( 0, start_loc + index2 ) + aDiff.text() + safeMid( text, start_loc + index2 ); + } + else if ( aDiff.isDelete() ) + { + // Deletion + text = text.substr( 0, start_loc + index2 ) + safeMid( text, start_loc + diff_xIndex( diffs, index1 + aDiff.text().length() ) ); + } + } + if ( !aDiff.isDelete() ) + { + index1 += aDiff.text().length(); + } + } + } + } + } + x++; + } + // Strip the padding off. + text = safeMid( text, nullPadding.length(), text.length() - 2 * nullPadding.length() ); + return { text, results }; + } + + std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TPatchVector patches, std::string text ) + { + return patch_apply( patches, NUtils::to_wstring( text ) ); + } + + std::wstring diff_match_patch::patch_addPadding( TPatchVector &patches ) + { + auto paddingLength = Patch_Margin; + std::wstring nullPadding; + for ( char x = 1; x <= paddingLength; x++ ) + { + nullPadding += NUtils::to_wstring( x ); + } + + // Bump all the patches forward. + for ( auto &&aPatch : patches ) + { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + // auto && patch = patches.front(); + // TDiffVector & diffs = patch.diffs; + if ( patches.front().diffs.empty() || !patches.front().diffs.front().isEqual() ) + { + // Add nullPadding equality. + patches.front().diffs.emplace( patches.front().diffs.begin(), EOperation::eEQUAL, nullPadding ); + patches.front().start1 -= paddingLength; // Should be 0. + patches.front().start2 -= paddingLength; // Should be 0. + patches.front().length1 += paddingLength; + patches.front().length2 += paddingLength; + } + else if ( paddingLength > patches.front().diffs.front().text().length() ) + { + // Grow first equality. + auto &&firstDiff = patches.front().diffs.front(); + auto extraLength = paddingLength - firstDiff.text().length(); + firstDiff.fText = nullPadding.substr( firstDiff.text().length() ) + firstDiff.text(); + patches.front().start1 -= extraLength; + patches.front().start2 -= extraLength; + patches.front().length1 += extraLength; + patches.front().length2 += extraLength; + } + + // Add some padding on end of last diff. + // patch = patches.back(); + // diffs = patch.diffs; + if ( ( patches.back().diffs.size() == 0 ) || !patches.back().diffs.back().isEqual() ) + { + // Add nullPadding equality. + patches.back().diffs.emplace_back( EOperation::eEQUAL, nullPadding ); + patches.back().length1 += paddingLength; + patches.back().length2 += paddingLength; + } + else if ( paddingLength > patches.back().diffs.back().text().length() ) + { + // Grow last equality. + // Diff &lastDiff = patches.back().diffs.back(); + auto extraLength = paddingLength - patches.back().diffs.back().text().length(); + patches.back().diffs.back().fText += nullPadding.substr( 0, extraLength ); + patches.back().length1 += extraLength; + patches.back().length2 += extraLength; + } + + return nullPadding; + } + + void diff_match_patch::patch_splitMax( TPatchVector &patches ) + { + auto patch_size = Match_MaxBits; + for ( int x = 0; x < patches.size(); x++ ) + { + if ( patches[ x ].length1 <= patch_size ) + { + continue; + } + Patch bigpatch = patches[ x ]; + // Remove the big old patch. + NUtils::Splice( patches, x--, 1 ); + auto start1 = bigpatch.start1; + auto start2 = bigpatch.start2; + std::wstring precontext; + while ( !bigpatch.diffs.empty() ) + { + // Create one of several smaller patches. + Patch patch; + bool empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if ( precontext.length() != 0 ) + { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.emplace_back( EOperation::eEQUAL, precontext ); + } + while ( !bigpatch.diffs.empty() && ( patch.length1 < ( patch_size - Patch_Margin ) ) ) + { + auto diff_text = bigpatch.diffs[ 0 ].text(); + if ( bigpatch.diffs[ 0 ].isInsert() ) + { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.push_back( bigpatch.diffs.front() ); + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + empty = false; + } + else if ( ( bigpatch.diffs[ 0 ].isDelete() ) && ( patch.diffs.size() == 1 ) && ( patch.diffs.front().isEqual() ) && ( diff_text.length() > 2 * patch_size ) ) + { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.emplace_back( bigpatch.diffs[ 0 ] ); + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + } + else + { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substr( 0, std::min( diff_text.length(), ( patch_size > ( patch.length1 + Patch_Margin ) ) ? ( patch_size - patch.length1 - Patch_Margin ) : ( -1 * 1UL ) ) ); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if ( bigpatch.diffs[ 0 ].isEqual() ) + { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } + else + { + empty = false; + } + patch.diffs.emplace_back( bigpatch.diffs[ 0 ], diff_text ); + if ( diff_text == bigpatch.diffs[ 0 ].text() ) + { + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + } + else + { + bigpatch.diffs[ 0 ].fText = bigpatch.diffs[ 0 ].text().substr( diff_text.length() ); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2( patch.diffs ); + precontext = precontext.substr( std::max( kZERO, ( precontext.length() > Patch_Margin ) ? ( precontext.length() - Patch_Margin ) : 0 ) ); + + std::wstring postcontext; + // Append the end context for this patch. + if ( diff_text1( bigpatch.diffs ).length() > Patch_Margin ) + { + postcontext = diff_text1( bigpatch.diffs ).substr( 0, Patch_Margin ); + } + else + { + postcontext = diff_text1( bigpatch.diffs ); + } + + if ( postcontext.length() != 0 ) + { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if ( ( patch.diffs.size() != 0 ) && ( patch.diffs[ patch.diffs.size() - 1 ].isEqual() ) ) + { + patch.diffs[ patch.diffs.size() - 1 ].fText += postcontext; + } + else + { + patch.diffs.emplace_back( EOperation::eEQUAL, postcontext ); + } + } + if ( !empty ) + { + NUtils::Splice( patches, ++x, 0ULL, patch ); + } + } + } + } + + std::wstring diff_match_patch::patch_toText( const TPatchVector &patches ) + { + std::wstring text; + for ( auto &&aPatch : patches ) + { + text += aPatch.toString(); + } + return text; + } + + TPatchVector diff_match_patch::patch_fromText( const std::wstring &textline ) + { + TPatchVector patches; + if ( textline.empty() ) + { + return patches; + } + auto text = NUtils::splitString( textline, L"\n", true ); + int textPointer = 0; + std::wstring line; + while ( textPointer < text.size() ) + { + patches.push_back( text[ textPointer ] ); + auto &patch = patches.back(); + textPointer++; + + while ( textPointer < text.size() ) + { + if ( text[ textPointer ].empty() ) + { + ++textPointer; + continue; + } + + auto sign = text[ textPointer ][ 0 ]; + + line = text[ textPointer ].substr( 1 ); + NUtils::replace( line, L"+", L"%2b" ); + line = NUtils::fromPercentEncoding( line ); + if ( sign == '-' ) + { + // Deletion. + patch.diffs.emplace_back( EOperation::eDELETE, line ); + } + else if ( sign == '+' ) + { + // Insertion. + patch.diffs.emplace_back( EOperation::eINSERT, line ); + } + else if ( sign == ' ' ) + { + // Minor equality. + patch.diffs.emplace_back( EOperation::eEQUAL, line ); + } + else if ( sign == '@' ) + { + // Start of next patch. + break; + } + else + { + // WTF? + throw std::wstring( std::wstring( L"Invalid patch mode '" ) + sign + std::wstring( L" in: " ) + line ); + return {}; + } + textPointer++; + } + } + return patches; + } + + TPatchVector diff_match_patch::patch_fromText( const std::string &textline ) + { + return patch_fromText( NUtils::to_wstring( textline ) ); + } + + std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos ) + { + return safeMid( str, pos, std::string::npos ); + } + + std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos, std::size_t len ) + { + return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); + } + + std::wstring NUtils::to_wstring( const diff_match_patch::TVariant &variant, bool doubleQuoteEmpty ) + { + std::wstring retVal; + if ( std::holds_alternative< std::wstring >( variant ) ) + retVal = std::get< std::wstring >( variant ); + + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + + return retVal; + } + + std::wstring NUtils::to_wstring( const Patch &patch, bool doubleQuoteEmpty ) + { + auto retVal = patch.toString(); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + return retVal; + } + + std::wstring NUtils::to_wstring( const Diff &diff, bool doubleQuoteEmpty ) + { + auto retVal = diff.toString( EStringType::eDefault ); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + return retVal; + } +} diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h new file mode 100644 index 00000000..57c25cbb --- /dev/null +++ b/cpp17/diff_match_patch.h @@ -0,0 +1,721 @@ +/* +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_H +#define DIFF_MATCH_PATCH_H + +#include "diff_match_patch_utils.h" + +#include +#include +#include +#include +#include +#include +#include +#ifdef USE_GTEST + #include "gtest/gtest.h" +#endif + +/* + * Functions for diff, match and patch. + * Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * + * @author fraser@google.com (Neil Fraser) + * + * Qt/C++ port by mikeslemmer@gmail.com (Mike Slemmer): + * Qt->C++17 with native STL use only, port by scott@towel42.com (Scott Aron + Bloom) + * C++17 was intentionally chosen for variant support + * + * Code known to compile with C++17 VS2022 and g++ 9.5.0 + * + * Here is a trivial sample program which works properly when linked with this + * library: + * + + #include "diff_match_patch.h" + int main(int argc, char **argv) { + auto str1 = std::string("First string in diff"); + auto str2 = std::string("Second string in diff"); + + diff_match_patch dmp; + auto strPatch = dmp.patch_toText(dmp.patch_make(str1, str2)); + auto out = dmp.patch_apply(dmp.patch_fromText(strPatch), str1); + auto strResult = out.first; + + // here, strResult will equal str2 above. + return 0; + } + +*/ + +namespace NDiffMatchPatch +{ + class diff_match_patch_test; + + /**- + * The data structure representing a diff is a Linked list of Diff objects: + * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), + * Diff(Operation.EQUAL, " world.")} + * which means: delete "Hello", add "Goodbye" and keep " world." + */ + enum class EOperation + { + eDELETE, + eINSERT, + eEQUAL + }; + std::wstring toString( EOperation op ); + + enum class EStringType + { + eDefault, + eUnitTest, + ePatch + }; + /** + * Class representing one diff operation. + */ + class Diff + { + public: + // The text associated with this diff operation. + + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL. + * @param text The text being applied. + */ + Diff() = default; + + template< typename STRING_TYPE > + Diff( EOperation _operation, const STRING_TYPE &_text ) : + fOperation( _operation ), + fText( NUtils::to_wstring( _text ) ) + { + } + + template< typename STRING_TYPE > + Diff( const Diff &rhs, const STRING_TYPE &_text ) : + fOperation( rhs.fOperation ), + fText( NUtils::to_wstring( _text ) ) + { + } + + inline bool isNull() const; + std::wstring toString( EStringType stringType ) const; + std::wstring toHtml() const; + std::wstring toConsole() const; + std::wstring toDelta() const; + + bool isDelete() const { return fOperation == EOperation::eDELETE; } + bool isInsert() const { return fOperation == EOperation::eINSERT; } + bool isEqual() const { return fOperation == EOperation::eEQUAL; } + + const std::wstring &text() const { return fText; } + + bool operator==( const Diff &d ) const; + bool operator!=( const Diff &d ) const; + + EOperation fOperation{ EOperation::eDELETE }; + std::wstring fText; + }; + using TDiffVector = std::vector< Diff >; + + /** + * Class representing one patch operation. + */ + class Patch + { + public: + TDiffVector diffs; + std::size_t length1{ 0 }; + std::size_t length2{ 0 }; + std::size_t start1{ 0 }; + std::size_t start2{ 0 }; + + /** + * Constructor. Initializes with an empty list of diffs. + */ + Patch(); + Patch( std::wstring &text ); // modifies text, and removes the text used + bool isNull() const; + std::wstring toString() const; + + private: + std::wstring getPatchHeader() const; + std::wstring getCoordinateString( std::size_t start, std::size_t length ) const; + }; + using TPatchVector = std::vector< Patch >; + + /** + * Class containing the diff, match and patch methods. + * Also contains the behaviour settings. + */ + + class diff_match_patch + { + friend class NDiffMatchPatch::diff_match_patch_test; +#ifdef USE_GTEST + FRIEND_TEST( diff_match_patch_test, testDiffCommonOverlap ); + FRIEND_TEST( diff_match_patch_test, testDiffHalfmatch ); + FRIEND_TEST( diff_match_patch_test, testDiffLinesToChars ); + FRIEND_TEST( diff_match_patch_test, testDiffCharsToLines ); + FRIEND_TEST( diff_match_patch_test, testDiffBisect ); + FRIEND_TEST( diff_match_patch_test, testMatchAlphabet ); + FRIEND_TEST( diff_match_patch_test, testMatchBitap ); + FRIEND_TEST( diff_match_patch_test, testPatchAddContext ); +#endif + public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout{ 1.0f }; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost{ 4 }; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold{ 0.5f }; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int64_t Match_Distance{ 1000 }; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold{ 0.5f }; + // Chunk size for context length. + short Patch_Margin{ 4 }; + + short Match_MaxBits{ 32 }; // unit tests are based on 32 bits + + private: + // Define some regex patterns for matching boundaries. + static std::wregex BLANKLINEEND; + static std::wregex BLANKLINESTART; + + public: + using TStringVector = std::vector< std::wstring >; + using TVariant = std::variant< std::wstring, TStringVector >; + using TVariantVector = std::vector< TVariant >; + using TCharPosMap = std::map< wchar_t, std::size_t >; + + diff_match_patch(); + + // DIFF FUNCTIONS + + /** + * Find the differences between two texts. + * Run a faster slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to true. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2 ); + TDiffVector diff_main( const std::string &text1, const std::string &text2 ); + + /** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @return Linked List of Diff objects. + */ + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ); + TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines ); + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout instead. + * @return Linked List of Diff objects. + */ + private: + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); + TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ); + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private: + TDiffVector diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); + TDiffVector diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ); + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private: + TDiffVector diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ); + TDiffVector diff_lineMode( std::string text1, std::string text2, clock_t deadline ); + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + protected: + TDiffVector diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ); + TDiffVector diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ); + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ + private: + TDiffVector diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ); + TDiffVector diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ); + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First string. + * @param text2 Second string. + * @return Three element Object array, containing the encoded text1, the + * encoded text2 and the List of unique strings. The zeroth element + * of the List of unique strings is intentionally blank. + */ + protected: + std::vector< TVariant > diff_linesToChars( const std::wstring &text1, + const std::wstring &text2 ); // return elems 0 and 1 are std::wstring, + // elem 2 is TStringVector + std::vector< TVariant > diff_linesToChars( const std::string &text1, const std::string &text2 ); + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text String to encode. + * @param lineArray List of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ + private: + std::wstring diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ); + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param diffs LinkedList of Diff objects. + * @param lineArray List of unique strings. + */ + private: + void diff_charsToLines( TDiffVector &diffs, const TStringVector &lineArray ); + + /** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ + public: + std::size_t diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonPrefix( const std::string &text1, const std::string &text2 ); + + /** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ + public: + std::size_t diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonSuffix( const std::string &text1, const std::string &text2 ); + + /** + * Determine if the suffix of one string is the prefix of another. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of the first + * string and the start of the second string. + */ + protected: + std::size_t diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonOverlap( const std::string &text1, const std::string &text2 ); + + /** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First string. + * @param text2 Second string. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ + protected: + TStringVector diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ); + TStringVector diff_halfMatch( const std::string &text1, const std::string &text2 ); + + /** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * @param longtext Longer string. + * @param shorttext Shorter string. + * @param i Start index of quarter length substring within longtext. + * @return Five element String array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or null if there was no match. + */ + private: + TStringVector diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ); + TStringVector diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ); + + /** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupSemantic( TDiffVector &diffs ); + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupSemanticLossless( TDiffVector &diffs ); + + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ + private: + int64_t diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ); + int64_t diff_cleanupSemanticScore( const std::string &one, const std::string &two ); + + /** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupEfficiency( TDiffVector &diffs ); + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupMerge( TDiffVector &diffs ); + + /** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs LinkedList of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ + public: + std::size_t diff_xIndex( const TDiffVector &diffs, std::size_t loc ); + + /** + * Convert a Diff list into a pretty HTML report. + * @param diffs LinkedList of Diff objects. + * @return HTML representation. + */ + public: + std::wstring diff_prettyHtml( const TDiffVector &diffs ); + + /** + * Convert a Diff list into a pretty Console report. Red for delete, and green for insert + * @param diffs LinkedList of Diff objects. + * @return Console representation. + */ + public: + std::wstring diff_prettyConsole( const TDiffVector &diffs ); + + /** + * Compute and return the source text (all equalities and deletions). + * @param diffs LinkedList of Diff objects. + * @return Source text. + */ + public: + std::wstring diff_text1( const TDiffVector &diffs ); + + /** + * Compute and return the destination text (all equalities and insertions). + * @param diffs LinkedList of Diff objects. + * @return Destination text. + */ + public: + std::wstring diff_text2( const TDiffVector &diffs ); + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs LinkedList of Diff objects. + * @return Number of changes. + */ + public: + std::size_t diff_levenshtein( const TDiffVector &diffs ); + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param diffs Array of diff tuples. + * @return Delta text. + */ + public: + std::wstring diff_toDelta( const TDiffVector &diffs ); + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param text1 Source string for the diff. + * @param delta Delta text. + * @return Array of diff tuples or null if invalid. + * @throws std::wstring If invalid input. + */ + public: + TDiffVector diff_fromDelta( const std::wstring &text1, const std::wstring &delta ); + TDiffVector diff_fromDelta( const std::string &text1, const std::string &delta ); + + // MATCH FUNCTIONS + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + public: + std::size_t match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); + std::size_t match_main( const std::string &text, const std::string &pattern, std::size_t loc ); + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + protected: + std::size_t match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); + std::size_t match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ); + + /** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ + private: + double match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ); + + /** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations. + */ + protected: + TCharPosMap match_alphabet( const std::wstring &pattern ); + TCharPosMap match_alphabet( const std::string &pattern ); + + // PATCH FUNCTIONS + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ + protected: + void patch_addContext( Patch &patch, const std::wstring &text ); + void patch_addContext( Patch &patch, const std::string &text ); + + /** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return LinkedList of Patch objects. + */ + public: + TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2 ); + TPatchVector patch_make( const std::string &text1, const std::string &text2 ); + + /** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ + public: + TPatchVector patch_make( const TDiffVector &diffs ); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param text2 Ignored. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + * @deprecated Prefer patch_make(const std::wstring &text1, const std::list< + * Diff > &diffs). + */ + public: + TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2, const TDiffVector &diffs ); + TPatchVector patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ + public: + TPatchVector patch_make( const std::wstring &text1, const TDiffVector &diffs ); + TPatchVector patch_make( const std::string &text1, const TDiffVector &diffs ); + + /** + * Given an array of patches, return another array that is identical. + * @param patches Array of patch objects. + * @return Array of patch objects. + */ + public: + TPatchVector patch_deepCopy( const TPatchVector &patches ); + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * @param patches Array of patch objects. + * @param text Old text. + * @return Two element Object array, containing the new text and an array of + * boolean values. + */ + public: + std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::wstring text ); + std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::string text ); + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches Array of patch objects. + * @return The padding string added to each side. + */ + public: + std::wstring patch_addPadding( TPatchVector &patches ); + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches LinkedList of Patch objects. + */ + public: + void patch_splitMax( TPatchVector &patches ); + + /** + * Take a list of patches and return a textual representation. + * @param patches List of Patch objects. + * @return Text representation of patches. + */ + public: + std::wstring patch_toText( const TPatchVector &patches ); + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * @param textline Text representation of patches. + * @return List of Patch objects. + * @throws std::wstring If invalid input. + */ + public: + TPatchVector patch_fromText( const std::wstring &textline ); + TPatchVector patch_fromText( const std::string &textline ); + + /** + * A safer version of std::wstring.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ + private: + static std::wstring safeMid( const std::wstring &str, std::size_t pos ); + + /** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead + * of null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ + private: + static std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); + }; + + namespace NUtils + { + std::wstring to_wstring( const diff_match_patch::TVariant &variant, bool doubleQuoteEmpty ); + std::wstring to_wstring( const Diff &diff, bool doubleQuoteEmpty ); + std::wstring to_wstring( const Patch &patch, bool doubleQuoteEmpty ); + } // namespace NUtils +} +#endif // DIFF_MATCH_PATCH_H diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp new file mode 100644 index 00000000..a71faeb5 --- /dev/null +++ b/cpp17/diff_match_patch_test.cpp @@ -0,0 +1,1109 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch.h" + +#include "diff_match_patch_test.h" +#include "diff_match_patch_utils.h" + +#ifdef USE_GTEST + #include "gtest/gtest.h" +#endif + +#include +#include +#include +#include + +int main( int argc, char **argv ) +{ +#ifdef USE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + int retVal = RUN_ALL_TESTS(); +#else + diff_match_patch_test dmp_test; + std::cerr << "Starting diff_match_patch unit tests.\n"; + int retVal = dmp_test.run_all_tests(); + std::cerr << "Done.\n"; +#endif + return retVal; +} + +static wchar_t kZero{ 0 }; +static wchar_t kOne{ 1 }; +static wchar_t kTwo{ 2 }; +namespace NDiffMatchPatch +{ + void PrintTo( const Diff &value, ::std::ostream *os ) + { + std::wostringstream oss; + oss << value.toString( EStringType::eUnitTest ); + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + *os << utf8_conv.to_bytes( oss.str() ); + } + + diff_match_patch_test::diff_match_patch_test() + { + } + +#ifndef USE_GTEST + bool diff_match_patch_test::runTest( std::function< void() > test ) + { + bool retVal = false; + try + { + test(); + numPassedTests++; + retVal = true; + } + catch ( std::string msg ) + { + std::cerr << "Test failed: " << msg << "\n"; + numFailedTests++; + retVal = false; + } + return retVal; + } + + int diff_match_patch_test::run_all_tests() + { + auto startTime = std::chrono::high_resolution_clock::now(); + + runTest( std::bind( &diff_match_patch_test::testDiffCommonPrefix, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCommonSuffix, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCommonOverlap, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffHalfmatch, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffLinesToChars, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCharsToLines, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupMerge, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupSemanticLossless, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupSemantic, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupEfficiency, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffPrettyHtml, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffPrettyConsole, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffText, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffDelta, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffXIndex, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffLevenshtein, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffBisect, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffMain, this ) ); + + runTest( std::bind( &diff_match_patch_test::testMatchAlphabet, this ) ); + runTest( std::bind( &diff_match_patch_test::testMatchBitap, this ) ); + runTest( std::bind( &diff_match_patch_test::testMatchMain, this ) ); + + runTest( std::bind( &diff_match_patch_test::testPatchObj, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchFromText, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchToText, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchAddContext, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchMake, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchSplitMax, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchAddPadding, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchApply, this ) ); + if ( numFailedTests == 0 ) + std::cout << numPassedTests << " Tests Passed\n" << numFailedTests << " Tests Failed\n"; + else + std::cerr << numPassedTests << " Tests Passed\n" << numFailedTests << " Tests Failed\n"; + auto endTime = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast< std::chrono::milliseconds >( endTime - startTime ).count(); + std::wcout << "Total time: " << elapsed << " ms\n"; + return ( numFailedTests == 0 ) ? 0 : 1; + } +#endif + + // DIFF TEST FUNCTIONS + + TEST_F( diff_match_patch_test, testDiffCommonPrefix ) + { + // Detect any common prefix. + assertEquals( "diff_commonPrefix: nullptr case.", 0, dmp.diff_commonPrefix( "abc", "xyz" ) ); + + assertEquals( "diff_commonPrefix: Non-nullptr case.", 4, dmp.diff_commonPrefix( "1234abcdef", "1234xyz" ) ); + + assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( "1234", "1234xyz" ) ); + } + + TEST_F( diff_match_patch_test, testDiffCommonSuffix ) + { + // Detect any common suffix. + assertEquals( "diff_commonSuffix: nullptr case.", 0, dmp.diff_commonSuffix( "abc", "xyz" ) ); + + assertEquals( "diff_commonSuffix: Non-nullptr case.", 4, dmp.diff_commonSuffix( "abcdef1234", "xyz1234" ) ); + + assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( "1234", "xyz1234" ) ); + } + + TEST_F( diff_match_patch_test, testDiffCommonOverlap ) + { + // Detect any suffix/prefix overlap. + assertEquals( "diff_commonOverlap: nullptr case.", 0, dmp.diff_commonOverlap( "", "abcd" ) ); + + assertEquals( "diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap( "abc", "abcd" ) ); + + assertEquals( "diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap( "123456", "abcd" ) ); + + assertEquals( "diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap( "123456xxx", "xxxabcd" ) ); + + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals( "diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap( L"fi", std::wstring( L"\ufb01i" ) ) ); + } + + TEST_F( diff_match_patch_test, testDiffHalfmatch ) + { + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty( "diff_halfMatch: No match #1.", dmp.diff_halfMatch( "1234567890", "abcdef" ) ); + + assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( "12345", "23" ) ); + + assertEquals( "diff_halfMatch: Single Match #1.", TStringVector( { L"12", L"90", L"a", L"z", L"345678" } ), dmp.diff_halfMatch( "1234567890", "a345678z" ) ); + + assertEquals( "diff_halfMatch: Single Match #2.", TStringVector( { L"a", L"z", L"12", L"90", L"345678" } ), dmp.diff_halfMatch( "a345678z", "1234567890" ) ); + + assertEquals( "diff_halfMatch: Single Match #3.", TStringVector( { L"abc", L"z", L"1234", L"0", L"56789" } ), dmp.diff_halfMatch( "abc56789z", "1234567890" ) ); + + assertEquals( "diff_halfMatch: Single Match #4.", TStringVector( { L"a", L"xyz", L"1", L"7890", L"23456" } ), dmp.diff_halfMatch( "a23456xyz", "1234567890" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #1.", TStringVector( { L"12123", L"123121", L"a", L"z", L"1234123451234" } ), dmp.diff_halfMatch( "121231234123451234123121", "a1234123451234z" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #2.", TStringVector( { L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-=" } ), dmp.diff_halfMatch( "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #3.", TStringVector( { L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y" } ), dmp.diff_halfMatch( "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy" ) ); + + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not + // -qHillo+x=HelloHe-w+Hulloy + assertEquals( "diff_halfMatch: Non-optimal halfmatch.", TStringVector( { L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe" } ), dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); + + dmp.Diff_Timeout = 0; + assertEmpty( "diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch( L"qHilloHelloHew", L"xHelloHeHulloy" ) ); + } + + TEST_F( diff_match_patch_test, testDiffLinesToChars ) + { + // Convert lines down to characters. + TStringVector tmpVector = TStringVector( { L"", L"alpha\n", L"beta\n" } ); + TVariantVector tmpVarList; + tmpVarList.emplace_back( NUtils::to_wstring( { 1, 2, 1 }, false ) ); //(("\u0001\u0002\u0001")); + tmpVarList.emplace_back( NUtils::to_wstring( { 2, 1, 2 }, false ) ); // (("\u0002\u0001\u0002")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"alpha\r\n" ); + tmpVector.emplace_back( L"beta\r\n" ); + tmpVector.emplace_back( L"\r\n" ); + tmpVarList.emplace_back( std::wstring() ); + tmpVarList.emplace_back( NUtils::to_wstring( { 1, 2, 3, 3 }, false ) ); // (("\u0001\u0002\u0003\u0003")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "", "alpha\r\nbeta\r\n\r\n\r\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"a" ); + tmpVector.emplace_back( L"b" ); + tmpVarList.emplace_back( NUtils::to_wstring( kOne ) ); // (("\u0001")); + tmpVarList.emplace_back( NUtils::to_wstring( kTwo ) ); // (("\u0002")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "a", "b" ) ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + std::wstring lines; + std::wstring chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); + lines += std::to_wstring( x ) + L"\n"; + chars += NUtils::to_wstring( (wchar_t)x ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.emplace( tmpVector.begin(), L"" ); + tmpVarList.emplace_back( chars ); + tmpVarList.emplace_back( std::wstring() ); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, {} ) ); + } + + TEST_F( diff_match_patch_test, testDiffCharsToLines ) + { + // First check that Diff equality works. + assertTrue( "diff_charsToLines:", Diff( EOperation::eEQUAL, "a" ) == Diff( EOperation::eEQUAL, "a" ) ); + + assertEquals( "diff_charsToLines:", Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eEQUAL, "a" ) ); + + // Convert chars up to lines. + TDiffVector diffs; + diffs.emplace_back( EOperation::eEQUAL, NUtils::to_wstring( { 1, 2, 1 }, false ) ); // ("\u0001\u0002\u0001"); + diffs.emplace_back( EOperation::eINSERT, NUtils::to_wstring( { 2, 1, 2 }, false ) ); // ("\u0002\u0001\u0002"); + TStringVector tmpVector; + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"alpha\n" ); + tmpVector.emplace_back( L"beta\n" ); + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines:", TDiffVector( { Diff( EOperation::eEQUAL, "alpha\nbeta\nalpha\n" ), Diff( EOperation::eINSERT, "beta\nalpha\nbeta\n" ) } ), diffs ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + std::vector< TVariant > tmpVarList; + std::wstring lines; + std::wstring chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); + lines += std::to_wstring( x ) + L"\n"; + chars += NUtils::to_wstring( (wchar_t)x ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.emplace( tmpVector.begin(), L"" ); + diffs = { Diff( EOperation::eDELETE, chars ) }; + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines: More than 256.", TDiffVector( { Diff( EOperation::eDELETE, lines ) } ), diffs ); + } + + TEST_F( diff_match_patch_test, testDiffCleanupMerge ) + { + // Cleanup a messy diff. + TDiffVector diffs; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: nullptr case.", TDiffVector(), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eINSERT, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: No change case.", TDiffVector( { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eINSERT, "c" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eEQUAL, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge equalities.", TDiffVector( { Diff( EOperation::eEQUAL, "abc" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eDELETE, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge deletions.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ) } ), diffs ); + + diffs = { Diff( EOperation::eINSERT, "a" ), Diff( EOperation::eINSERT, "b" ), Diff( EOperation::eINSERT, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge insertions.", TDiffVector( { Diff( EOperation::eINSERT, "abc" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "b" ), Diff( EOperation::eDELETE, "c" ), Diff( EOperation::eINSERT, "d" ), Diff( EOperation::eEQUAL, "e" ), Diff( EOperation::eEQUAL, "f" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge interweave.", TDiffVector( { Diff( EOperation::eDELETE, "ac" ), Diff( EOperation::eINSERT, "bd" ), Diff( EOperation::eEQUAL, "ef" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "abc" ), Diff( EOperation::eDELETE, "dc" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", TDiffVector( { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "d" ), Diff( EOperation::eINSERT, "b" ), Diff( EOperation::eEQUAL, "c" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "x" ), Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "abc" ), Diff( EOperation::eDELETE, "dc" ), Diff( EOperation::eEQUAL, "y" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", TDiffVector( { Diff( EOperation::eEQUAL, "xa" ), Diff( EOperation::eDELETE, "d" ), Diff( EOperation::eINSERT, "b" ), Diff( EOperation::eEQUAL, "cy" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eINSERT, "ba" ), Diff( EOperation::eEQUAL, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left.", TDiffVector( { Diff( EOperation::eINSERT, "ab" ), Diff( EOperation::eEQUAL, "ac" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "c" ), Diff( EOperation::eINSERT, "ab" ), Diff( EOperation::eEQUAL, "a" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right.", TDiffVector( { Diff( EOperation::eEQUAL, "ca" ), Diff( EOperation::eINSERT, "ba" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eEQUAL, "c" ), Diff( EOperation::eDELETE, "ac" ), Diff( EOperation::eEQUAL, "x" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left recursive.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eEQUAL, "acx" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "x" ), Diff( EOperation::eDELETE, "ca" ), Diff( EOperation::eEQUAL, "c" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eEQUAL, "a" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right recursive.", TDiffVector( { Diff( EOperation::eEQUAL, "xca" ), Diff( EOperation::eDELETE, "cba" ) } ), diffs ); + } + + TEST_F( diff_match_patch_test, testDiffCleanupSemanticLossless ) + { + // Slide diffs to match logical boundaries. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "AAA\r\n\r\nBBB" ), Diff( EOperation::eINSERT, "\r\nDDD\r\n\r\nBBB" ), Diff( EOperation::eEQUAL, "\r\nEEE" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Blank lines.", TDiffVector( { Diff( EOperation::eEQUAL, "AAA\r\n\r\n" ), Diff( EOperation::eINSERT, "BBB\r\nDDD\r\n\r\n" ), Diff( EOperation::eEQUAL, "BBB\r\nEEE" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "AAA\r\nBBB" ), Diff( EOperation::eINSERT, " DDD\r\nBBB" ), Diff( EOperation::eEQUAL, " EEE" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "AAA\r\n" ), Diff( EOperation::eINSERT, "BBB DDD\r\n" ), Diff( EOperation::eEQUAL, "BBB EEE" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "The c" ), Diff( EOperation::eINSERT, "ow and the c" ), Diff( EOperation::eEQUAL, "at." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "The " ), Diff( EOperation::eINSERT, "cow and the " ), Diff( EOperation::eEQUAL, "cat." ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "The-c" ), Diff( EOperation::eINSERT, "ow-and-the-c" ), Diff( EOperation::eEQUAL, "at." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "The-" ), Diff( EOperation::eINSERT, "cow-and-the-" ), Diff( EOperation::eEQUAL, "cat." ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eEQUAL, "ax" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the start.", TDiffVector( { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eEQUAL, "aax" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "xa" ), Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eEQUAL, "a" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the end.", TDiffVector( { Diff( EOperation::eEQUAL, "xaa" ), Diff( EOperation::eDELETE, "a" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "The xxx. The " ), Diff( EOperation::eINSERT, "zzz. The " ), Diff( EOperation::eEQUAL, "yyy." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Sentence boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "The xxx." ), Diff( EOperation::eINSERT, " The zzz." ), Diff( EOperation::eEQUAL, " The yyy." ) } ), diffs ); + } + + TEST_F( diff_match_patch_test, testDiffCleanupSemantic ) + { + // Cleanup semantically trivial equalities. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "cd" ), Diff( EOperation::eEQUAL, "12" ), Diff( EOperation::eDELETE, "e" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #1.", TDiffVector( { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "cd" ), Diff( EOperation::eEQUAL, "12" ), Diff( EOperation::eDELETE, "e" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "ABC" ), Diff( EOperation::eEQUAL, "1234" ), Diff( EOperation::eDELETE, "wxyz" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #2.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "ABC" ), Diff( EOperation::eEQUAL, "1234" ), Diff( EOperation::eDELETE, "wxyz" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eDELETE, "c" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Simple elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "b" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eEQUAL, "cd" ), Diff( EOperation::eDELETE, "e" ), Diff( EOperation::eEQUAL, "f" ), Diff( EOperation::eINSERT, "g" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Backpass elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abcdef" ), Diff( EOperation::eINSERT, "cdfg" ) } ), diffs ); + + diffs = { Diff( EOperation::eINSERT, "1" ), Diff( EOperation::eEQUAL, "A" ), Diff( EOperation::eDELETE, "B" ), Diff( EOperation::eINSERT, "2" ), Diff( EOperation::eEQUAL, "_" ), Diff( EOperation::eINSERT, "1" ), Diff( EOperation::eEQUAL, "A" ), Diff( EOperation::eDELETE, "B" ), Diff( EOperation::eINSERT, "2" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Multiple elimination.", TDiffVector( { Diff( EOperation::eDELETE, "AB_AB" ), Diff( EOperation::eINSERT, "1A2_1A2" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "The c" ), Diff( EOperation::eDELETE, "ow and the c" ), Diff( EOperation::eEQUAL, "at." ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "The " ), Diff( EOperation::eDELETE, "cow and the " ), Diff( EOperation::eEQUAL, "cat." ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "abcxx" ), Diff( EOperation::eINSERT, "xxdef" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No overlap elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abcxx" ), Diff( EOperation::eINSERT, "xxdef" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "abcxxx" ), Diff( EOperation::eINSERT, "xxxdef" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Overlap elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eEQUAL, "xxx" ), Diff( EOperation::eINSERT, "def" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "xxxabc" ), Diff( EOperation::eINSERT, "defxxx" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", TDiffVector( { Diff( EOperation::eINSERT, "def" ), Diff( EOperation::eEQUAL, "xxx" ), Diff( EOperation::eDELETE, "abc" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "abcd1212" ), Diff( EOperation::eINSERT, "1212efghi" ), Diff( EOperation::eEQUAL, "----" ), Diff( EOperation::eDELETE, "A3" ), Diff( EOperation::eINSERT, "3BC" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", TDiffVector( { Diff( EOperation::eDELETE, "abcd" ), Diff( EOperation::eEQUAL, "1212" ), Diff( EOperation::eINSERT, "efghi" ), Diff( EOperation::eEQUAL, "----" ), Diff( EOperation::eDELETE, "A" ), Diff( EOperation::eEQUAL, "3" ), Diff( EOperation::eINSERT, "BC" ) } ), diffs ); + } + + TEST_F( diff_match_patch_test, testDiffCleanupEfficiency ) + { + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + auto diffs = TDiffVector(); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: nullptr case.", TDiffVector(), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "wxyz" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: No elimination.", TDiffVector( { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "wxyz" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "xyz" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abxyzcd" ), Diff( EOperation::eINSERT, "12xyz34" ) } ), diffs ); + + diffs = { Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "x" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", TDiffVector( { Diff( EOperation::eDELETE, "xcd" ), Diff( EOperation::eINSERT, "12x34" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "xy" ), Diff( EOperation::eINSERT, "34" ), Diff( EOperation::eEQUAL, "z" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "56" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Backpass elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abxyzcd" ), Diff( EOperation::eINSERT, "12xy34z56" ) } ), diffs ); + + dmp.Diff_EditCost = 5; + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "wxyz" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: High cost elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abwxyzcd" ), Diff( EOperation::eINSERT, "12wxyz34" ) } ), diffs ); + dmp.Diff_EditCost = 4; + } + + TEST_F( diff_match_patch_test, testDiffPrettyHtml ) + { + // Pretty print. + auto diffs = TDiffVector( { Diff( EOperation::eEQUAL, "a\n" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eINSERT, "c&d" ) } ); + assertEquals( + "diff_prettyHtml:", + L"
<B>b</B>c&d", + dmp.diff_prettyHtml( diffs ) ); + } + + TEST_F( diff_match_patch_test, testDiffPrettyConsole ) + { + // Pretty print. + static std::wstring kRed{ L"\033[0;31m" }; + static std::wstring kGreen{ L"\033[0;32m" }; + static std::wstring kYellow{ L"\033[0;33m" }; + static std::wstring kReset{ L"\033[m" }; + static std::wstring kEOL{ NUtils::fromPercentEncoding( L"%C2%B6" ) + L"\n" }; + + auto diffs = TDiffVector( { Diff( EOperation::eEQUAL, "a\n" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eINSERT, "c&d" ) } ); + auto results = dmp.diff_prettyConsole( diffs ); + assertEquals( "diff_prettyConsole:", L"a" + kEOL + kRed + L"b" + kReset + kGreen + L"c&d" + kReset, results ); + } + + TEST_F( diff_match_patch_test, testDiffText ) + { + // Compute the source and destination texts. + auto diffs = { Diff( EOperation::eEQUAL, "jump" ), Diff( EOperation::eDELETE, "s" ), Diff( EOperation::eINSERT, "ed" ), Diff( EOperation::eEQUAL, " over " ), Diff( EOperation::eDELETE, "the" ), Diff( EOperation::eINSERT, "a" ), Diff( EOperation::eEQUAL, " lazy" ) }; + assertEquals( "diff_text1:", L"jumps over the lazy", dmp.diff_text1( diffs ) ); + assertEquals( "diff_text2:", L"jumped over a lazy", dmp.diff_text2( diffs ) ); + } + + TEST_F( diff_match_patch_test, testDiffDelta ) + { + // Convert a diff into delta string. + auto diffs = TDiffVector( { Diff( EOperation::eEQUAL, "jump" ), Diff( EOperation::eDELETE, "s" ), Diff( EOperation::eINSERT, "ed" ), Diff( EOperation::eEQUAL, " over " ), Diff( EOperation::eDELETE, "the" ), Diff( EOperation::eINSERT, "a" ), Diff( EOperation::eEQUAL, " lazy" ), Diff( EOperation::eINSERT, "old dog" ) } ); + std::wstring text1 = dmp.diff_text1( diffs ); + assertEquals( "diff_text1: Base text.", L"jumps over the lazy", text1 ); + + std::wstring delta = dmp.diff_toDelta( diffs ); + std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; + assertEquals( "diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); + + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta( text1, delta ) ); + + // Generates error (19 < 20). + assertThrow( "diff_fromDelta: Too long.", dmp.diff_fromDelta( text1 + L"x", delta ), std::wstring ); + + // Generates error (19 > 18). + assertThrow( "diff_fromDelta: Too short.", dmp.diff_fromDelta( text1.substr( 1 ), delta ), std::wstring ); + + // Generates error (%c3%xy invalid Unicode). + assertThrow( "diff_fromDelta: Invalid character.", dmp.diff_fromDelta( "", "+%c3%xy" ), std::wstring ); + + // Test deltas with special characters. + diffs = { Diff( EOperation::eEQUAL, std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %" ) ), Diff( EOperation::eDELETE, std::wstring( L"\u0681 " ) + kOne + std::wstring( L" \n ^" ) ), Diff( EOperation::eINSERT, std::wstring( L"\u0682 " ) + kTwo + std::wstring( L" \\ |" ) ) }; + + text1 = dmp.diff_text1( diffs ); + golden = std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %\u0681 " ) + kOne + std::wstring( L" \n ^" ); + assertEquals( "diff_text1: Unicode text", golden, text1 ); + + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta ); + + assertEquals( "diff_fromDelta: Unicode", diffs, dmp.diff_fromDelta( text1, delta ) ); + + // Verify pool of unchanged characters. + diffs = { Diff( EOperation::eINSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) }; + std::wstring text2 = dmp.diff_text2( diffs ); + assertEquals( "diff_text2: Unchanged characters.", L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); + + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unchanged characters.", L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); + + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( {}, delta ) ); + } + + TEST_F( diff_match_patch_test, testDiffXIndex ) + { + // Translate a location in text1 to text2. + auto diffs = TDiffVector( { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "1234" ), Diff( EOperation::eEQUAL, "xyz" ) } ); + assertEquals( "diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex( diffs, 2 ) ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "1234" ), Diff( EOperation::eEQUAL, "xyz" ) }; + assertEquals( "diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex( diffs, 3 ) ); + } + + TEST_F( diff_match_patch_test, testDiffLevenshtein ) + { + auto diffs = TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "1234" ), Diff( EOperation::eEQUAL, "xyz" ) } ); + assertEquals( "diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein( diffs ) ); + + diffs = { Diff( EOperation::eEQUAL, "xyz" ), Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "1234" ) }; + assertEquals( "diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein( diffs ) ); + + diffs = { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eEQUAL, "xyz" ), Diff( EOperation::eINSERT, "1234" ) }; + assertEquals( "diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein( diffs ) ); + } + + TEST_F( diff_match_patch_test, testDiffBisect ) + { + // Normal. + std::wstring a = L"cat"; + std::wstring b = L"map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + auto diffs = TDiffVector( { Diff( EOperation::eDELETE, "c" ), Diff( EOperation::eINSERT, "m" ), Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "t" ), Diff( EOperation::eINSERT, "p" ) } ); + auto results = dmp.diff_bisect( a, b, std::numeric_limits< clock_t >::max() ); + assertEquals( "diff_bisect: Normal.", diffs, results ); + + // Timeout. + diffs = { Diff( EOperation::eDELETE, "cat" ), Diff( EOperation::eINSERT, "map" ) }; + results = dmp.diff_bisect( a, b, 0 ); + assertEquals( "diff_bisect: Timeout.", diffs, results ); + } + + TEST_F( diff_match_patch_test, testDiffMain ) + { + // Perform a trivial diff. + auto diffs = TDiffVector(); + assertEquals( "diff_main: nullptr case.", diffs, dmp.diff_main( "", "", false ) ); + + diffs = { Diff( EOperation::eDELETE, "abc" ) }; + assertEquals( "diff_main: RHS side nullptr case.", diffs, dmp.diff_main( "abc", "", false ) ); + + diffs = { Diff( EOperation::eINSERT, "abc" ) }; + assertEquals( "diff_main: LHS side nullptr case.", diffs, dmp.diff_main( "", "abc", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "abc" ) }; + assertEquals( "diff_main: Equality.", diffs, dmp.diff_main( "abc", "abc", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "ab" ), Diff( EOperation::eINSERT, "123" ), Diff( EOperation::eEQUAL, "c" ) }; + assertEquals( "diff_main: Simple insertion.", diffs, dmp.diff_main( "abc", "ab123c", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "123" ), Diff( EOperation::eEQUAL, "bc" ) }; + assertEquals( "diff_main: Simple deletion.", diffs, dmp.diff_main( "a123bc", "abc", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eINSERT, "123" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eINSERT, "456" ), Diff( EOperation::eEQUAL, "c" ) }; + assertEquals( "diff_main: Two insertions.", diffs, dmp.diff_main( "abc", "a123b456c", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "123" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eDELETE, "456" ), Diff( EOperation::eEQUAL, "c" ) }; + assertEquals( "diff_main: Two deletions.", diffs, dmp.diff_main( "a123b456c", "abc", false ) ); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "b" ) }; + assertEquals( "diff_main: Simple case #1.", diffs, dmp.diff_main( "a", "b", false ) ); + + diffs = { Diff( EOperation::eDELETE, "Apple" ), Diff( EOperation::eINSERT, "Banana" ), Diff( EOperation::eEQUAL, "s are a" ), Diff( EOperation::eINSERT, "lso" ), Diff( EOperation::eEQUAL, " fruit." ) }; + assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( "Apples are a fruit.", "Bananas are also fruit.", false ) ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, L"\u0680" ), Diff( EOperation::eEQUAL, "x" ), Diff( EOperation::eDELETE, "\t" ), Diff( EOperation::eINSERT, NUtils::to_wstring( kZero ) ) }; + assertEquals( "diff_main: Simple case #3.", diffs, dmp.diff_main( L"ax\t", std::wstring( L"\u0680x" ) + kZero, false ) ); + + diffs = { Diff( EOperation::eDELETE, "1" ), Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "y" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eDELETE, "2" ), Diff( EOperation::eINSERT, "xab" ) }; + assertEquals( "diff_main: Overlap #1.", diffs, dmp.diff_main( "1ayb2", "abxab", false ) ); + + diffs = { Diff( EOperation::eINSERT, "xaxcx" ), Diff( EOperation::eEQUAL, "abc" ), Diff( EOperation::eDELETE, "y" ) }; + assertEquals( "diff_main: Overlap #2.", diffs, dmp.diff_main( "abcy", "xaxcxabc", false ) ); + + diffs = { Diff( EOperation::eDELETE, "ABCD" ), Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "=" ), Diff( EOperation::eINSERT, "-" ), Diff( EOperation::eEQUAL, "bcd" ), Diff( EOperation::eDELETE, "=" ), Diff( EOperation::eINSERT, "-" ), Diff( EOperation::eEQUAL, "efghijklmnopqrs" ), Diff( EOperation::eDELETE, "EFGHIJKLMNOefg" ) }; + assertEquals( "diff_main: Overlap #3.", diffs, dmp.diff_main( "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false ) ); + + diffs = { Diff( EOperation::eINSERT, " " ), Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eINSERT, "nd" ), Diff( EOperation::eEQUAL, " [[Pennsylvania]]" ), Diff( EOperation::eDELETE, " and [[New" ) }; + assertEquals( "diff_main: Large equality.", diffs, dmp.diff_main( "a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false ) ); + + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the + // text lengths. + std::wstring a = L"`Twas brillig, and the slithy toves\nDid gyre and gimble in the " + L"wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + std::wstring b = L"I am the very model of a modern major general,\nI've information " + L"vegetable, animal, and mineral,\nI know the kings of England, and I " + L"quote the fights historical,\nFrom Marathon to Waterloo, in order " + L"categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for ( int x = 0; x < 10; x++ ) + { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main( a, b ); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue( "diff_main: Timeout min.", ( dmp.Diff_Timeout * CLOCKS_PER_SEC ) <= ( endTime - startTime ) ); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue( "diff_main: Timeout max.", ( dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 ) > ( endTime - startTime ) ); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" + L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" + L"0\n1234567890\n"; + b = L"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij" + L"\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghi" + L"j\nabcdefghij\n"; + assertEquals( "diff_main: Simple line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = L"12345678901234567890123456789012345678901234567890123456789012345678901" + L"23456789012345678901234567890123456789012345678901234567890"; + b = L"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghija" + L"bcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals( "diff_main: Single line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" + L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" + L"0\n1234567890\n"; + b = L"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890" + L"\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n123456789" + L"0\nabcdefghij\n"; + TStringVector texts_linemode = diff_rebuildtexts( dmp.diff_main( a, b, true ) ); + TStringVector texts_textmode = diff_rebuildtexts( dmp.diff_main( a, b, false ) ); + assertEquals( "diff_main: Overlap line-mode.", texts_textmode, texts_linemode ); + } + + // MATCH TEST FUNCTIONS + + TEST_F( diff_match_patch_test, testMatchAlphabet ) + { + // Initialise the bitmasks for Bitap. + TCharPosMap bitmask; + bitmask[ 'a' ] = 4; + bitmask[ 'b' ] = 2; + bitmask[ 'c' ] = 1; + assertEquals( "match_alphabet: Unique.", bitmask, dmp.match_alphabet( "abc" ) ); + + bitmask = TCharPosMap(); + bitmask[ 'a' ] = 37; + bitmask[ 'b' ] = 18; + bitmask[ 'c' ] = 8; + assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( "abcaba" ) ); + } + + TEST_F( diff_match_patch_test, testMatchBitap ) + { + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Exact match #1.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 5 ) ); + + assertEquals( "match_bitap: Exact match #2.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 0 ) ); + + assertEquals( "match_bitap: Fuzzy match #1.", 4, dmp.match_bitap( "abcdefghijk", "efxhi", 0 ) ); + + assertEquals( "match_bitap: Fuzzy match #2.", 2, dmp.match_bitap( "abcdefghijk", "cdefxyhijk", 5 ) ); + + assertEquals( "match_bitap: Fuzzy match #3.", -1, dmp.match_bitap( "abcdefghijk", "bxy", 1 ) ); + + assertEquals( "match_bitap: Overflow.", 2, dmp.match_bitap( "123456789xx0", "3456789x0", 2 ) ); + + assertEquals( "match_bitap: Before start match.", 0, dmp.match_bitap( "abcdef", "xxabc", 4 ) ); + + assertEquals( "match_bitap: Beyond end match.", 3, dmp.match_bitap( "abcdef", "defyy", 4 ) ); + + assertEquals( "match_bitap: Oversized pattern.", 0, dmp.match_bitap( "abcdef", "xabcdefy", 0 ) ); + + dmp.Match_Threshold = 0.4f; + assertEquals( "match_bitap: Threshold #1.", 4, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); + + dmp.Match_Threshold = 0.3f; + assertEquals( "match_bitap: Threshold #2.", -1, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); + + dmp.Match_Threshold = 0.0f; + assertEquals( "match_bitap: Threshold #3.", 1, dmp.match_bitap( "abcdefghijk", "bcdef", 1 ) ); + + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Multiple select #1.", 0, dmp.match_bitap( "abcdexyzabcde", "abccde", 3 ) ); + + assertEquals( "match_bitap: Multiple select #2.", 8, dmp.match_bitap( "abcdexyzabcde", "abccde", 5 ) ); + + dmp.Match_Distance = 10; // Strict location. + assertEquals( "match_bitap: Distance test #1.", -1, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); + + assertEquals( "match_bitap: Distance test #2.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1 ) ); + + dmp.Match_Distance = 1000; // Loose location. + assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); + } + + TEST_F( diff_match_patch_test, testMatchMain ) + { + // Full match. + assertEquals( "match_main: Equality.", 0, dmp.match_main( "abcdef", "abcdef", 1000 ) ); + + assertEquals( "match_main: nullptr text.", -1, dmp.match_main( "", "abcdef", 1 ) ); + + assertEquals( "match_main: nullptr pattern.", 3, dmp.match_main( "abcdef", "", 3 ) ); + + assertEquals( "match_main: Exact match.", 3, dmp.match_main( "abcdef", "de", 3 ) ); + + dmp.Match_Threshold = 0.7f; + assertEquals( "match_main: Complex match.", 4, dmp.match_main( "I am the very model of a modern major general.", " that berry ", 5 ) ); + dmp.Match_Threshold = 0.5f; + } + + // PATCH TEST FUNCTIONS + + TEST_F( diff_match_patch_test, testPatchObj ) + { + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = { Diff( EOperation::eEQUAL, "jump" ), Diff( EOperation::eDELETE, "s" ), Diff( EOperation::eINSERT, "ed" ), Diff( EOperation::eEQUAL, " over " ), Diff( EOperation::eDELETE, "the" ), Diff( EOperation::eINSERT, "a" ), Diff( EOperation::eEQUAL, "\nlaz" ) }; + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "patch: toString.", strp, p.toString() ); + } + + TEST_F( diff_match_patch_test, testPatchFromText ) + { + assertTrue( "patch_fromText: #0.", dmp.patch_fromText( "" ).empty() ); + + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "patch_fromText: #1.", strp, dmp.patch_fromText( strp )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( "@@ -1 +1 @@\n-a\n+b\n" )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( "@@ -1,3 +0,0 @@\n-abc\n" )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( "@@ -0,0 +1,3 @@\n+abc\n" )[ 0 ].toString() ); + + // Generates error. + assertThrow( "patch_fromText: #5.", dmp.patch_fromText( "Bad\nPatch\n" ), std::wstring ); + } + + TEST_F( diff_match_patch_test, testPatchToText ) + { + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + auto patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Single", strp, dmp.patch_toText( patches ) ); + + strp = L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n " + L"tes\n"; + patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Dua", strp, dmp.patch_toText( patches ) ); + } + + TEST_F( diff_match_patch_test, testPatchAddContext ) + { + dmp.Patch_Margin = 4; + auto p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough trailing context.", L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough leading context.", L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps. The quick brown fox crashes." ); + assertEquals( "patch_addContext: Ambiguity.", L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); + } + + TEST_F( diff_match_patch_test, testPatchMake ) + { + TPatchVector patches; + patches = dmp.patch_make( "", "" ); + assertEquals( "patch_make: nullptr case", L"", dmp.patch_toText( patches ) ); + + std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; + std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; + std::wstring expectedPatch = L"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n " + L"jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to + // rolling context. + patches = dmp.patch_make( text2, text1 ); + assertEquals( "patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText( patches ) ); + + expectedPatch = L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n " + L"jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText( patches ) ); + + auto diffs = dmp.diff_main( text1, text2, false ); + patches = dmp.patch_make( diffs ); + assertEquals( "patch_make: Diff input", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( text1, diffs ); + assertEquals( "patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( text1, text2, diffs ); + assertEquals( "patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?" ); + assertEquals( + "patch_toText: Character encoding.", + L"@@ -1,21 +1,21 " + L"@@\n-%601234567890-=%5B%5D%5C;',./" + L"\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", + dmp.patch_toText( patches ) ); + + diffs = { Diff( EOperation::eDELETE, "`1234567890-=[]\\;',./" ), Diff( EOperation::eINSERT, "~!@#$%^&*()_+{}|:\"<>?" ) }; + assertEquals( + "patch_fromText: Character decoding.", diffs, + dmp.patch_fromText( "@@ -1,21 +1,21 " + "@@\n-%601234567890-=%5B%5D%5C;',./" + "\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" )[ 0 ] + .diffs ); + + text1 = {}; + for ( int x = 0; x < 100; x++ ) + { + text1 += L"abcdef"; + } + text2 = text1 + L"123"; + expectedPatch = L"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText( patches ) ); + } + + TEST_F( diff_match_patch_test, testPatchSplitMax ) + { + dmp.Diff_Timeout = 0; + // Confirm Match_MaxBits is 32. + TPatchVector patches; + patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); + dmp.patch_splitMax( patches ); + assertEquals( + "patch_splitMax: #1.", + L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n " + L"ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n " + L"uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n " + L"zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", + dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( + "abcdef123456789012345678901234567890123456789012345678901234567890123456" + "7890uvwxyz", + "abcdefuvwxyz" ); + std::wstring oldToText = dmp.patch_toText( patches ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #2.", oldToText, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "1234567890123456789012345678901234567890123456789012345678901234567890", "abc" ); + dmp.patch_splitMax( patches ); + auto golden = L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ " + L"-29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ " + L"-57,14 +1,3 @@\n-78901234567890\n+abc\n"; + auto results = dmp.patch_toText( patches ); + assertEquals( "patch_splitMax: #3.", golden, results ); + + patches = dmp.patch_make( + "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : " + "0 , t : 1", + "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : " + "0 , t : 1" ); + dmp.patch_splitMax( patches ); + assertEquals( + "patch_splitMax: #4.", + L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ " + L"-29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", + dmp.patch_toText( patches ) ); + } + + TEST_F( diff_match_patch_test, testPatchAddPadding ) + { + TPatchVector patches; + patches = dmp.patch_make( "", "test" ); + assertEquals( "patch_addPadding: Both edges ful", L"@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges full.", L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "XY", "XtestY" ); + assertEquals( "patch_addPadding: Both edges partial.", L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges partial.", L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "XXXXYYYY", "XXXXtestYYYY" ); + assertEquals( "patch_addPadding: Both edges none.", L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges none.", L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + } + + TEST_F( diff_match_patch_test, testPatchApply ) + { + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + TPatchVector patches; + patches = dmp.patch_make( "", "" ); + auto results = dmp.patch_apply( patches, "Hello world." ); + auto &&boolArray = results.second; + + std::wstring resultStr = results.first + L"\t" + std::to_wstring( boolArray.size() ); + assertEquals( "patch_apply: nullptr case.", L"Hello world.\t0", resultStr ); + + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog." ); + assertEquals( + "patch_apply: Exact match.", + L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 " + L"@@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", + dmp.patch_toText( patches ) ); + + results = dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + + assertEquals( "patch_apply: Exact match.", L"That quick brown fox jumped over a lazy dog.(true, true)", resultStr ); + + results = dmp.patch_apply( patches, "The quick red rabbit jumps over the tired tiger." ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( "patch_apply: Partial match.", L"That quick red rabbit jumped over a tired tiger.(true, true)", resultStr ); + + results = dmp.patch_apply( patches, "I am the very model of a modern major general." ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( "patch_apply: Failed match.", L"I am the very model of a modern major general.(false, false)", resultStr ); + + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy" ); + results = dmp.patch_apply( + patches, "x123456789012345678901234567890-----++++++++++----" + "-123456789012345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( "patch_apply: Big delete, small change.", L"xabcy(true, true)", resultStr ); + + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy" ); + results = dmp.patch_apply( + patches, "x12345678901234567890---------------++++++++++----" + "-----------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( + "patch_apply: Big delete, large change 1.", + L"xabc12345678901234567890---------------++++++++++-------------" + L"--12345678901234567890y(false, true)", + resultStr ); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy" ); + results = dmp.patch_apply( + patches, "x12345678901234567890---------------++++++++++----" + "-----------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( "patch_apply: Big delete, large change 2.", L"xabcy(true, true)", resultStr ); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make( + "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------" + "1234567YYYYYYYYYY890" ); + results = dmp.patch_apply( patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( + "patch_apply: Compensate for failed patch.", + L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------" + L"1234567YYYYYYYYYY890(false, true)", + resultStr ); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make( "", "test" ); + std::wstring patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, "" ); + assertEquals( "patch_apply: No side effects.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "Woof" ); + patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "", "test" ); + results = dmp.patch_apply( patches, "" ); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); + assertEquals( "patch_apply: Edge exact match.", L"test\ttrue", resultStr ); + + patches = dmp.patch_make( "XY", "XtestY" ); + results = dmp.patch_apply( patches, "XY" ); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); + assertEquals( "patch_apply: Near edge exact match.", L"XtestY\ttrue", resultStr ); + + patches = dmp.patch_make( "y", "y123" ); + results = dmp.patch_apply( patches, "x" ); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); + assertEquals( "patch_apply: Edge partial match.", L"x123\ttrue", resultStr ); + } + + TEST_F( diff_match_patch_test, fromGitHubExamples ) + { + auto lhs = L"I am the very model of a modern Major-General, I've information " + L"vegetable, animal, and mineral, I know the kings of England, and I " + L"quote the fights historical, From Marathon to Waterloo, in order " + L"categorical."; + auto rhs = L"I am the very model of a cartoon individual, My animation's comical, " + L"unusual, and whimsical, I'm quite adept at funny gags, comedic theory " + L"I have read, From wicked puns and stupid jokes to anvils that drop on " + L"your head."; + auto diffs = dmp.diff_main( lhs, rhs ); + dmp.diff_cleanupSemantic( diffs ); + auto console = dmp.diff_prettyConsole( diffs ); + auto html = dmp.diff_prettyHtml( diffs ); + auto delta = dmp.diff_toDelta( diffs ); + + auto consoleGolden = L"I am the very model of a \x1B[0;31mmodern Major-General, I've " + L"information vegetable, animal, and mineral, I know the kings of " + L"England, and I quote the fights historical, From Marathon to Waterloo, " + L"in order categorical\x1B[m\x1B[0;32mcartoon individual, My animation's " + L"comical, unusual, and whimsical, I'm quite adept at funny gags, " + L"comedic theory I have read, From wicked puns and stupid jokes to " + L"anvils that drop on your head\x1B[m."; + assertEquals( "gitHubDemos", consoleGolden, console ); + + auto htmlGolden = LR"(I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categoricalcartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head.)"; + assertEquals( "gitHubDemos", htmlGolden, html ); + + auto deltaGolden = L"=25\t-182\t+cartoon individual, My animation's comical, unusual, and " + L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " + L"From wicked puns and stupid jokes to anvils that drop on your head\t=1"; + assertEquals( "gitHubDemos", deltaGolden, delta ); + + auto patches = dmp.patch_make( lhs, rhs ); + auto patch = dmp.patch_toText( patches ); + auto patchGolden = L"@@ -22,187 +22,198 @@\n f a \n-modern Major-General, I've information " + L"vegetable, animal, and mineral, I know the kings of England, and I " + L"quote the fights historical, From Marathon to Waterloo, in order " + L"categorical\n+cartoon individual, My animation's comical, unusual, and " + L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " + L"From wicked puns and stupid jokes to anvils that drop on your head\n " + L".\n"; + assertEquals( "gitHubDemos", patchGolden, patch ); + } +} diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h new file mode 100644 index 00000000..7114ecf4 --- /dev/null +++ b/cpp17/diff_match_patch_test.h @@ -0,0 +1,175 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_TEST_H +#define DIFF_MATCH_PATCH_TEST_H + +#ifdef USE_GTEST + #include "gtest/gtest.h" + #define assertEquals( msg, GOLDEN, COMPUTED ) EXPECT_EQ( GOLDEN, COMPUTED ) << msg + #define assertEmpty( msg, COMPUTED ) EXPECT_TRUE( COMPUTED.empty() ) << msg + #define assertTrue( msg, COMPUTED ) EXPECT_TRUE( COMPUTED ) << msg + #define assertFalse( msg, COMPUTED ) EXPECT_FALSE( COMPUTED ) << msg + #define PUBLIC_TESTING : public testing::Test + #define assertThrow( msg, STATEMENT, EXCEPTION_TYPE ) EXPECT_THROW( STATEMENT, EXCEPTION_TYPE ) << msg +#else + #include + #define PUBLIC_TESTING + #define TEST_F( className, funcName ) void diff_match_patch_test::funcName() +#endif + +#include "diff_match_patch_utils.h" + +namespace NDiffMatchPatch +{ + class diff_match_patch_test PUBLIC_TESTING + { + public: + using TStringVector = NDiffMatchPatch::diff_match_patch::TStringVector; + using TCharPosMap = NDiffMatchPatch::diff_match_patch::TCharPosMap; + using TVariant = NDiffMatchPatch::diff_match_patch::TVariant; + using TVariantVector = NDiffMatchPatch::diff_match_patch::TVariantVector; + + diff_match_patch_test(); + +#ifndef USE_GTEST + public: + int run_all_tests(); + + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffPrettyConsole(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); + + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); + + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); + + void fromGitHubExamples(); + + private: + bool runTest( std::function< void() > test ); + std::size_t numPassedTests{ 0 }; + std::size_t numFailedTests{ 0 }; + + // Define equality. + template< typename T > + void assertEquals( const std::string &strCase, const T &lhs, const T &rhs ) + { + bool failed = ( lhs.size() != rhs.size() ); + if ( !failed ) + { + for ( auto ii = 0ULL; !failed && ( ii < lhs.size() ); ++ii ) + { + auto &&t1 = lhs[ ii ]; + auto &&t2 = rhs[ ii ]; + failed = t1 != t2; + } + } + + if ( failed ) + { + // Build human readable description of both lists. + auto lhsString = NUtils::to_wstring( lhs, true ); + auto rhsString = NUtils::to_wstring( rhs, true ); + reportFailure( strCase, lhsString, rhsString ); + return; + } + reportPassed( strCase ); + } + + void assertEquals( const std::string &strCase, bool lhs, bool rhs ); + void assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ); + void assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ); + void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ); + void assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ); + void assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ); + void assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ); + + void assertTrue( const std::string &strCase, bool value ); + void assertFalse( const std::string &strCase, bool value ); + void assertEmpty( const std::string &strCase, const TStringVector &list ); + + void reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ); + void reportPassed( const std::string &strCase ); + + #define assertThrow( msg, COMMAND, EXCEPTION_TYPE ) \ + { \ + bool exceptionTriggered = false; \ + try \ + { \ + COMMAND; \ + assertFalse( msg, true ); \ + } \ + catch ( const EXCEPTION_TYPE &ex ) \ + { \ + exceptionTriggered = true; \ + } \ + assertTrue( std::string( msg ) + std::string( " - Exception triggered" ), exceptionTriggered ); \ + } + +#endif + public: + bool equals( const TVariant &var1, const TVariant &var2 ); + + template< typename T > + bool equals( const T &lhs, const T &rhs ) + { + bool equal = ( lhs.size() == rhs.size() ); + for ( auto ii = 0ULL; equal && ( ii < lhs.size() ); ++ii ) + { + auto &&t1 = lhs[ ii ]; + auto &&t2 = rhs[ ii ]; + equal = t1 == t2; + } + return equal; + } + NDiffMatchPatch::diff_match_patch dmp; + + // Construct the two texts which made up the diff originally. + TStringVector diff_rebuildtexts( const NDiffMatchPatch::TDiffVector &diffs ); + }; +} + +#endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp17/diff_match_patch_test_assertEquals.cpp b/cpp17/diff_match_patch_test_assertEquals.cpp new file mode 100644 index 00000000..853fa4c4 --- /dev/null +++ b/cpp17/diff_match_patch_test_assertEquals.cpp @@ -0,0 +1,158 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "diff_match_patch.h" +#include "diff_match_patch_test.h" +#include "diff_match_patch_utils.h" +namespace NDiffMatchPatch +{ +#ifndef USE_GTEST + void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) + { + std::cout << "FAILED : " + strCase + "\n"; + std::wcerr << " Expected: " << expected << "\n Actual: " << actual << "\n"; + numFailedTests++; + // throw strCase; + } + + void diff_match_patch_test::reportPassed( const std::string &strCase ) + { + std::cout << "PASSED: " + strCase + "\n"; + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ) + { + if ( n1 != n2 ) + { + reportFailure( strCase, std::to_wstring( n1 ), std::to_wstring( n2 ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ) + { + if ( s1 != s2 ) + { + reportFailure( strCase, s1, s2 ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) + { + return assertEquals( strCase, NUtils::to_wstring( s1 ), NUtils::to_wstring( s2 ) ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) + { + if ( d1 != d2 ) + { + reportFailure( strCase, d1.toString(), d2.toString() ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ) + { + if ( var1 != var2 ) + { + reportFailure( strCase, NUtils::to_wstring( var1 ), NUtils::to_wstring( var2 ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ) + { + for ( auto &&ii : m1 ) + { + auto rhs = m2.find( ii.first ); + if ( rhs == m2.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + for ( auto &&ii : m2 ) + { + auto rhs = m1.find( ii.first ); + if ( rhs == m1.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, bool rhs ) + { + if ( lhs != rhs ) + { + reportFailure( strCase, NUtils::to_wstring( lhs, false ), NUtils::to_wstring( rhs, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) + { + if ( !value ) + { + reportFailure( strCase, NUtils::to_wstring( true, false ), NUtils::to_wstring( false, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertFalse( const std::string &strCase, bool value ) + { + if ( value ) + { + reportFailure( strCase, NUtils::to_wstring( false, false ), NUtils::to_wstring( true, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) + { + if ( !list.empty() ) + { + throw strCase; + } + } +#endif + + // Construct the two texts which made up the diff originally. + diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) + { + TStringVector text( 2, std::wstring() ); + for ( auto &&myDiff : diffs ) + { + if ( !myDiff.isInsert() ) + { + text[ 0 ] += myDiff.text(); + } + + if ( !myDiff.isDelete() ) + { + text[ 1 ] += myDiff.text(); + } + } + return text; + } +} diff --git a/cpp17/diff_match_patch_test_utils.cpp b/cpp17/diff_match_patch_test_utils.cpp new file mode 100644 index 00000000..14deb663 --- /dev/null +++ b/cpp17/diff_match_patch_test_utils.cpp @@ -0,0 +1,157 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "diff_match_patch.h" +#include "diff_match_patch_test.h" +#include "diff_match_patch_utils.h" +namespace NDiffMatchPatch +{ +#ifndef USE_GTEST + void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) + { + std::cout << "FAILED : " + strCase + "\n"; + std::wcerr << " Expected: " << expected << "\n Actual: " << actual << "\n"; + numFailedTests++; + // throw strCase; + } + + void diff_match_patch_test::reportPassed( const std::string &strCase ) + { + std::cout << "PASSED: " + strCase + "\n"; + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ) + { + if ( n1 != n2 ) + { + reportFailure( strCase, std::to_wstring( n1 ), std::to_wstring( n2 ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ) + { + if ( s1 != s2 ) + { + reportFailure( strCase, s1, s2 ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) + { + return assertEquals( strCase, NUtils::to_wstring( s1 ), NUtils::to_wstring( s2 ) ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) + { + if ( d1 != d2 ) + { + reportFailure( strCase, d1.toString(), d2.toString() ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ) + { + if ( var1 != var2 ) + { + reportFailure( strCase, NUtils::to_wstring( var1 ), NUtils::to_wstring( var2 ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ) + { + for ( auto &&ii : m1 ) + { + auto rhs = m2.find( ii.first ); + if ( rhs == m2.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + for ( auto &&ii : m2 ) + { + auto rhs = m1.find( ii.first ); + if ( rhs == m1.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, bool rhs ) + { + if ( lhs != rhs ) + { + reportFailure( strCase, NUtils::to_wstring( lhs, false ), NUtils::to_wstring( rhs, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) + { + if ( !value ) + { + reportFailure( strCase, NUtils::to_wstring( true, false ), NUtils::to_wstring( false, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertFalse( const std::string &strCase, bool value ) + { + if ( value ) + { + reportFailure( strCase, NUtils::to_wstring( false, false ), NUtils::to_wstring( true, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) + { + if ( !list.empty() ) + { + throw strCase; + } + } +#endif + + // Construct the two texts which made up the diff originally. + diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) + { + TStringVector text( 2, std::wstring() ); + for ( auto &&myDiff : diffs ) + { + if ( !myDiff.isInsert() ) + { + text[ 0 ] += myDiff.text(); + } + if ( !myDiff.isDelete() ) + { + text[ 1 ] += myDiff.text(); + } + } + return text; + } +} \ No newline at end of file diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp new file mode 100644 index 00000000..324fc526 --- /dev/null +++ b/cpp17/diff_match_patch_utils.cpp @@ -0,0 +1,201 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch_utils.h" + +#include +#include + +namespace NDiffMatchPatch +{ + namespace NUtils + { + std::wstring safeMid( const std::wstring &str, std::size_t pos ) + { + return safeMid( str, pos, std::string::npos ); + } + + std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ) + { + return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); + } + + void replace( std::wstring &inString, const std::wstring &from, const std::wstring &to ) + { + std::size_t pos = inString.find( from ); + while ( pos != std::wstring::npos ) + { + inString.replace( pos, from.length(), to ); + pos = inString.find( from, pos + to.length() ); + } + } + + wchar_t toHexUpper( wchar_t value ) + { + return L"0123456789ABCDEF"[ value & 0xF ]; + } + + std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ) + { + std::wstring retVal; + + if ( ( ( c >= 0x61 && c <= 0x7A ) // ALPHA + || ( c >= 0x41 && c <= 0x5A ) // ALPHA + || ( c >= 0x30 && c <= 0x39 ) // DIGIT + || c == 0x2D // - + || c == 0x2E // . + || c == 0x5F // _ + || c == 0x7E // ~ + || ( exclude.find( c ) != std::string::npos ) ) + && ( include.find( c ) == std::string::npos ) ) + { + retVal = std::wstring( 1, c ); + } + else + { + retVal = L'%'; + retVal += toHexUpper( ( c & 0xf0 ) >> 4 ); + retVal += toHexUpper( c & 0xf ); + } + return retVal; + } + + std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, const std::wstring &include /*= std::wstring() */ ) + { + if ( input.empty() ) + return {}; + std::wstring retVal; + retVal.reserve( input.length() * 3 ); + + static_assert( sizeof( wchar_t ) <= 4, "wchar_t is greater that 32 bit" ); + + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + for ( auto &&c : input ) + { + auto currStr = std::wstring( 1, c ); + auto asBytes = utf8_conv.to_bytes( currStr ); + for ( auto &&ii : asBytes ) + { + if ( ii ) + retVal += toPercentEncoding( ii, exclude, include ); + } + } + return retVal; + } + + wchar_t getValue( wchar_t ch ) + { + if ( ch >= '0' && ch <= '9' ) + ch -= '0'; + else if ( ch >= 'a' && ch <= 'f' ) + ch = ch - 'a' + 10; + else if ( ch >= 'A' && ch <= 'F' ) + ch = ch - 'A' + 10; + else + throw std::wstring( L"Invalid Character %" ) + ch; + + return ch; + } + + std::wstring fromPercentEncoding( const std::wstring &input ) + { + if ( input.empty() ) + return {}; + std::string retVal; + retVal.reserve( input.length() ); + for ( auto ii = 0ULL; ii < input.length(); ++ii ) + { + auto c = input[ ii ]; + if ( c == L'%' && ( ii + 2 ) < input.length() ) + { + auto a = input[ ++ii ]; + auto b = input[ ++ii ]; + a = getValue( a ); + b = getValue( b ); + a = a << 4; + auto value = a | b; + retVal += std::string( 1, value ); + } + else if ( c == '+' ) + retVal += ' '; + else + { + retVal += c; + } + } + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + auto asBytes = utf8_conv.from_bytes( retVal ); + + return asBytes; + } + + bool endsWith( const std::wstring &string, const std::wstring &suffix ) + { + if ( suffix.length() > string.length() ) + return false; + + return string.compare( string.length() - suffix.length(), suffix.length(), suffix ) == 0; + } + + TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ) + { + if ( separator.empty() ) + { + if ( !skipEmptyParts || !string.empty() ) + return { string }; + return {}; + } + + TStringVector strings; + auto prevPos = 0ULL; + auto startPos = string.find_first_of( separator ); + while ( startPos != std::string::npos ) + { + auto start = prevPos ? prevPos + 1 : prevPos; + auto len = prevPos ? ( startPos - prevPos - 1 ) : startPos; + auto curr = string.substr( start, len ); + prevPos = startPos; + if ( !skipEmptyParts || !curr.empty() ) + strings.emplace_back( curr ); + startPos = string.find_first_of( separator, prevPos + 1 ); + } + auto remainder = string.substr( prevPos ? prevPos + 1 : prevPos ); + if ( !skipEmptyParts || !remainder.empty() ) + strings.emplace_back( remainder ); + + return strings; + } + + int64_t toInt( const std::wstring &string ) + { + int64_t retVal = 0; + try + { + std::size_t lastPos{}; + retVal = std::stoul( string, &lastPos ); + if ( lastPos != string.length() ) + return 0; + } + catch ( ... ) + { + } + return retVal; + } + + } // namespace NUtils +} diff --git a/cpp17/diff_match_patch_utils.h b/cpp17/diff_match_patch_utils.h new file mode 100644 index 00000000..332eb1f1 --- /dev/null +++ b/cpp17/diff_match_patch_utils.h @@ -0,0 +1,350 @@ +/* +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_UTILS_H +#define DIFF_MATCH_PATCH_UTILS_H +// +#include +#include +#include +namespace NDiffMatchPatch +{ + namespace NUtils + { + using TStringVector = std::vector< std::wstring >; + + /* + * Utility functions to replace Qt built in methods + */ + + /** + * A safer version of std::wstring.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ + std::wstring safeMid( const std::wstring &str, std::size_t pos ); + + /** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead + * of null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ + std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); + + /** + + * replaces QString::replace + * @param haystack String to replace all needles with to + * @param needle Substring to search for in the haystack + * @param to replacement string + * @return void. + */ + void replace( std::wstring &haystack, const std::wstring &needle, const std::wstring &to ); + + /** + * replaces returns the html percent encoded character equivalent + * @param c the input Character to return the encoded string of + * @param exclude The list of chars that are NOT to be encoded + * @param include The list of chars that are to be encoded + * @return the encoded string + */ + std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); + + /** + * return the html percent encoded string equivalent + * @param input the input String to return the encoded string of + * @param exclude The list of chars that are NOT to be encoded + * @param include The list of chars that are to be encoded + * @return the encoded string + */ + std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); + + /** + * returns the string equivalent removing any percent encoding and replacing it + * with the correct character + * @param input the input String to return the encoded string of + * @return the decoded string + */ + std::wstring fromPercentEncoding( const std::wstring &input ); + + /** + * replaces returns integer value of the character, '0'-'9' = 0-9, 'A'-'F' = + * 10-15, 'a'-'f' = 10-15 + * @param input the value to return the integer value of + * @return the integer value of the character + */ + wchar_t getIntValue( wchar_t ch ); + + /** + * return the integer value of the string + * @param string the String to be converted to an integer + * @return the integer version, on an invalid input returns 0 + */ + int64_t toInt( const std::wstring &string ); + + /** + * return true if the string has the suffix + * @param string the String to check to see if it ends with suffix + * @param suffix the String to see if the input string ends with + * @return True if the string ends with suffix + */ + bool endsWith( const std::wstring &string, const std::wstring &suffix ); + + /** + * return a TStringVector of the string split by separator + * @param string the String to be split + * @param separator the String to search in the input string to split on + * @param if true, empty values will be removed + * @return the split string + */ + TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ); + + /** + * splices the objects vector into the input vector + * @param input The input vector to splice out from + * @param start The position of the first item to remove from the input vector + * @param count How many values to remove from the input vector + * @param objects optional objects to insert where the previous objects were + * removed + * @return the character as a single character string + */ + template< typename T > + static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const std::vector< T > &objects = {} ) + { + auto deletedRange = std::vector< T >( { input.begin() + start, input.begin() + start + count } ); + input.erase( input.begin() + start, input.begin() + start + count ); + input.insert( input.begin() + start, objects.begin(), objects.end() ); + + return deletedRange; + } + + /** + * splices the objects vector into the input vector + * @param input The input vector to splice out from + * @param start The position of the first item to remove from the input vector + * @param count How many values to remove from the input vector + * @param object individual object to insert where the previous objects were + * removed + * @return the character as a single character string + */ + template< typename T1 > + using base_type = typename std::remove_reference< std::remove_cv_t< std::decay_t< T1 > > >; + + template< typename T1 > + using base_type_t = typename base_type< T1 >::type; + + template< typename STRING_TYPE > + using is_wstring = typename std::is_same< std::wstring, base_type_t< STRING_TYPE > >; + + template< typename STRING_TYPE > + using is_wcharstar = typename std::is_same< wchar_t *, base_type_t< STRING_TYPE > >; + + template< typename STRING_TYPE > + using is_string = typename std::is_same< std::string, base_type_t< STRING_TYPE > >; + + template< typename STRING_TYPE > + using is_charstar = typename std::is_same< char *, base_type_t< STRING_TYPE > >; + + template< typename T > + static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const T &object ) + { + return Splice( input, start, count, std::vector< T >( { object } ) ); + } + + template< typename T > + std::wstring to_wstring( const T & /*value*/, bool /*doubleQuoteEmpty*/ ) + { + assert( false ); + return {}; + } + + /** + * return the single character wide string for the given character + * @param value the char to be converted to an wstring + * @param doubleQuoteEmpty, if the return value would be empty, return "" + * @return the character as a single character string + */ + inline std::wstring to_wstring( const char &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, static_cast< wchar_t >( value ) ); + } + + template<> + inline std::wstring to_wstring( const bool &value, bool /*doubleQuoteOnEmpty*/ ) + { + std::wstring retVal = std::wstring( value ? L"true" : L"false" ); + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::vector< bool >::reference &value, bool /*doubleQuoteOnEmpty*/ ) + { + std::wstring retVal = std::wstring( value ? L"true" : L"false" ); + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::string &string, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && string.empty() ) + return LR"("")"; + + std::wstring wstring( string.size(), + L' ' ); // Overestimate number of code points. + wstring.resize( std::mbstowcs( &wstring[ 0 ], string.c_str(), + string.size() ) ); // Shrink to fit. + return wstring; + } + + template<> + inline std::wstring to_wstring( const wchar_t &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, value ); + } + + template<> + inline std::wstring to_wstring( const int &value, bool doubleQuoteEmpty ) + { + return to_wstring( static_cast< wchar_t >( value ), doubleQuoteEmpty ); + } + + template<> + inline std::wstring to_wstring( const std::wstring &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && value.empty() ) + return LR"("")"; + + return value; + } + + template< typename T > + inline std::wstring to_wstring( const std::vector< T > &values, bool doubleQuoteEmpty ) + { + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::vector< bool > &boolArray, bool doubleQuoteOnEmpty ) + { + if ( boolArray.empty() && doubleQuoteOnEmpty ) + return LR"("")"; + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : boolArray ) + { + if ( !first ) + retVal += L", "; + first = false; + retVal += to_wstring( curr, doubleQuoteOnEmpty ); + } + retVal += L")"; + return retVal; + } + + template< typename T > + inline typename std::enable_if_t< std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( values.size() == 0 ) ) + return LR"("")"; + + std::wstring retVal; + for ( auto &&curr : values ) + { + retVal += to_wstring( curr, false ); + } + return retVal; + } + + template< typename T > + inline typename std::enable_if_t< !std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty ) + { + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; + } + + //template< typename T > + //std::wstring to_wstring( const T &value ) + //{ + // return to_wstring( value, false ); + //} + + template< typename STRING_TYPE > + std::wstring to_wstring( const STRING_TYPE &string ) + { + static_assert( + is_wstring< STRING_TYPE >::value // + || is_wcharstar< STRING_TYPE >::value // + || is_string< STRING_TYPE >::value // + || is_charstar< STRING_TYPE >::value // + || std::is_same_v< char, STRING_TYPE > // + || std::is_same_v< wchar_t, STRING_TYPE > // + ); + + if constexpr ( is_wstring< STRING_TYPE >::value ) + return string; + else if constexpr ( is_wcharstar< STRING_TYPE >::value ) + return std::wstring( string ? string : L"" ); + else if constexpr ( is_string< STRING_TYPE >::value ) + return to_wstring( string, false ); + else if constexpr ( is_charstar< STRING_TYPE >::value ) + return to_wstring( std::string( string ? string : "" ), false ); + else if constexpr ( std::is_same_v< char, STRING_TYPE > ) + return to_wstring( string, false ); + else if constexpr ( std::is_same_v< wchar_t, STRING_TYPE > ) + return to_wstring( string, false ); + } + + } +} + +#endif diff --git a/objectivec/DiffMatchPatchCFUtilities.h b/objectivec/DiffMatchPatchCFUtilities.h index a9c93a20..690b9fa9 100755 --- a/objectivec/DiffMatchPatchCFUtilities.h +++ b/objectivec/DiffMatchPatchCFUtilities.h @@ -22,27 +22,29 @@ #ifndef _DIFFMATCHPATCHCFUTILITIES_H #define _DIFFMATCHPATCHCFUTILITIES_H -CFStringRef diff_CFStringCreateFromUnichar(UniChar ch); -CFStringRef diff_CFStringCreateJavaSubstring(CFStringRef s, CFIndex begin, CFIndex end); +CFStringRef diff_CFStringCreateFromUnichar( UniChar ch ); +CFStringRef diff_CFStringCreateJavaSubstring( CFStringRef s, CFIndex begin, CFIndex end ); -CFIndex diff_commonPrefix(CFStringRef text1, CFStringRef text2); -CFIndex diff_commonSuffix(CFStringRef text1, CFStringRef text2); -CFIndex diff_commonOverlap(CFStringRef text1, CFStringRef text2); -CFArrayRef diff_halfMatchCreate(CFStringRef text1, CFStringRef text2, const float diffTimeout); -CFArrayRef diff_halfMatchICreate(CFStringRef longtext, CFStringRef shorttext, CFIndex i); +CFIndex diff_commonPrefix( CFStringRef text1, CFStringRef text2 ); +CFIndex diff_commonSuffix( CFStringRef text1, CFStringRef text2 ); +CFIndex diff_commonOverlap( CFStringRef text1, CFStringRef text2 ); +CFArrayRef diff_halfMatchCreate( CFStringRef text1, CFStringRef text2, const float diffTimeout ); +CFArrayRef diff_halfMatchICreate( CFStringRef longtext, CFStringRef shorttext, CFIndex i ); -CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash, CFIndex maxLines); +CFStringRef diff_linesToCharsMungeCFStringCreate( CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash, CFIndex maxLines ); -CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two); +CFIndex diff_cleanupSemanticScore( CFStringRef one, CFStringRef two ); -CF_INLINE void diff_CFStringPrepareUniCharBuffer(CFStringRef string, const UniChar **string_chars, UniChar **string_buffer, CFRange string_range) { - *string_chars = CFStringGetCharactersPtr(string); - if (*string_chars == NULL) { - // Fallback in case CFStringGetCharactersPtr() didn’t work. - *string_buffer = malloc(string_range.length * sizeof(UniChar)); - CFStringGetCharacters(string, string_range, *string_buffer); - *string_chars = *string_buffer; - } +CF_INLINE void diff_CFStringPrepareUniCharBuffer( CFStringRef string, const UniChar **string_chars, UniChar **string_buffer, CFRange string_range ) +{ + *string_chars = CFStringGetCharactersPtr( string ); + if ( *string_chars == NULL ) + { + // Fallback in case CFStringGetCharactersPtr() didn’t work. + *string_buffer = malloc( string_range.length * sizeof( UniChar ) ); + CFStringGetCharacters( string, string_range, *string_buffer ); + *string_chars = *string_buffer; + } } -#endif //ifndef _DIFFMATCHPATCHCFUTILITIES_H +#endif //ifndef _DIFFMATCHPATCHCFUTILITIES_H diff --git a/objectivec/MinMaxMacros.h b/objectivec/MinMaxMacros.h index 2765e0fa..28059ae3 100755 --- a/objectivec/MinMaxMacros.h +++ b/objectivec/MinMaxMacros.h @@ -19,22 +19,28 @@ * ObjC port: jan@geheimwerk.de (Jan Weiß) */ -#if !defined(MIN) - #define MIN(A,B) \ - ({__typeof__(A) a = (A); \ - __typeof__(B) b = (B); \ - (a < b) ? a : b; }) +#if !defined( MIN ) + #define MIN( A, B ) \ + ( { \ + __typeof__( A ) a = ( A ); \ + __typeof__( B ) b = ( B ); \ + ( a < b ) ? a : b; \ + } ) #endif -#if !defined(MAX) - #define MAX(A,B) \ - ({__typeof__(A) a = (A); \ - __typeof__(B) b = (B); \ - (a > b) ? a : b; }) +#if !defined( MAX ) + #define MAX( A, B ) \ + ( { \ + __typeof__( A ) a = ( A ); \ + __typeof__( B ) b = ( B ); \ + ( a > b ) ? a : b; \ + } ) #endif -#if !defined(ABS) - #define ABS(A) \ - ({__typeof__(A) a = (A); \ - (a > 0) ? a : -a; }) +#if !defined( ABS ) + #define ABS( A ) \ + ( { \ + __typeof__( A ) a = ( A ); \ + ( a > 0 ) ? a : -a; \ + } ) #endif