Skip to content

Commit 3e5e7f8

Browse files
committed
[TEXT-235] Add Damerau-Levenshtein distance #687
- Update changes.xml - Use final - Add Javadoc - Sort members - Reduce vertical whitespace - Remove extra parentheses
1 parent 79f036a commit 3e5e7f8

File tree

3 files changed

+125
-130
lines changed

3 files changed

+125
-130
lines changed

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ The <action> type attribute can be add,update,fix,remove.
5151
<action type="fix" dev="ggregory" due-to="Pierre Post, Sumit Bera, Alex Herbert, Gary Gregory" issue="TEXT-236">Inserting at end of a TextStringBuilder throws a StringIndexOutOfBoundsException.</action>
5252
<!-- ADD -->
5353
<action type="add" dev="ggregory" due-to="Piotr P. Karwasz, Gary Gregory">Add experimental CycloneDX VEX file #683.</action>
54+
<action type="add" dev="ggregory" due-to="LorgeN, Gary Gregory" issue="TEXT-235">Add Damerau-Levenshtein distance #687.</action>
5455
<!-- UPDATE -->
5556
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-parent from 85 to 90 #704.</action>
5657
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons.bytebuddy.version from 1.17.6 to 1.17.8 #696.</action>

src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ private static <E> int limitedCompare(SimilarityInput<E> left, SimilarityInput<E
115115
minCost = Integer.MAX_VALUE;
116116

117117
for (rightIndex = 1; rightIndex <= rightLength; rightIndex++) {
118-
cost = (left.at(leftIndex - 1) == right.at(rightIndex - 1)) ? 0 : 1;
118+
cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0 : 1;
119119

120120
// Select cheapest operation
121121
curr[rightIndex] = Math.min(
@@ -220,7 +220,7 @@ private static <E> int unlimitedCompare(SimilarityInput<E> left, SimilarityInput
220220
curr[0] = leftIndex;
221221

222222
for (rightIndex = 1; rightIndex <= rightLength; rightIndex++) {
223-
cost = (left.at(leftIndex - 1) == right.at(rightIndex - 1)) ? 0 : 1;
223+
cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0 : 1;
224224

225225
// Select cheapest operation
226226
curr[rightIndex] = Math.min(

src/test/java/org/apache/commons/text/similarity/DamerauLevenshteinDistanceTest.java

Lines changed: 122 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
*/
1717
package org.apache.commons.text.similarity;
1818

19-
import static org.junit.jupiter.api.Assertions.assertNull;
2019
import static org.junit.jupiter.api.Assertions.assertEquals;
20+
import static org.junit.jupiter.api.Assertions.assertNull;
2121
import static org.junit.jupiter.api.Assertions.assertThrows;
2222

2323
import java.util.Arrays;
@@ -29,6 +29,9 @@
2929
import org.junit.jupiter.params.provider.Arguments;
3030
import org.junit.jupiter.params.provider.MethodSource;
3131

32+
/**
33+
* Tests {@link DamerauLevenshteinDistance}.
34+
*/
3235
public class DamerauLevenshteinDistanceTest {
3336

3437
private static DamerauLevenshteinDistance defaultInstance;
@@ -38,95 +41,50 @@ static void createInstance() {
3841
defaultInstance = new DamerauLevenshteinDistance();
3942
}
4043

41-
@Test
42-
void testGetThresholdDirectlyAfterObjectInstantiation() {
43-
assertNull(defaultInstance.getThreshold());
44-
}
45-
46-
@Test
47-
void testGetThresholdIsCorrect() {
48-
DamerauLevenshteinDistance distance = new DamerauLevenshteinDistance(10);
49-
50-
assertEquals(10, distance.getThreshold());
51-
}
52-
53-
@Test
54-
void testNullInputsThrowUnlimited() {
55-
assertThrows(IllegalArgumentException.class, () -> defaultInstance.apply(null, "test"));
56-
assertThrows(IllegalArgumentException.class, () -> defaultInstance.apply("test", null));
57-
assertThrows(IllegalArgumentException.class, () -> defaultInstance.apply(null, SimilarityInput.input("test")));
58-
assertThrows(IllegalArgumentException.class, () -> defaultInstance.apply(SimilarityInput.input("test"), null));
59-
}
60-
61-
@Test
62-
void testNullInputsThrowLimited() {
63-
DamerauLevenshteinDistance instance = new DamerauLevenshteinDistance(10);
64-
65-
assertThrows(IllegalArgumentException.class, () -> instance.apply(null, "test"));
66-
assertThrows(IllegalArgumentException.class, () -> instance.apply("test", null));
67-
assertThrows(IllegalArgumentException.class, () -> instance.apply(null, SimilarityInput.input("test")));
68-
assertThrows(IllegalArgumentException.class, () -> instance.apply(SimilarityInput.input("test"), null));
69-
}
70-
71-
@Test
72-
void testInvalidThresholdThrows() {
73-
assertThrows(IllegalArgumentException.class, () -> new DamerauLevenshteinDistance(-1));
74-
}
75-
76-
@ParameterizedTest(name = "DamerauLevenshteinDistance.unlimitedCompare(\"{0}\", \"{1}\") should return {2}")
77-
@MethodSource("unlimitedDamerauLevenshteinDistanceTestCases")
78-
void testCalculateDamerauLevenshteinDistance(String left, String right, int expectedDistance) {
79-
int leftRightDistance = defaultInstance.apply(left, right);
80-
int rightLeftDistance = defaultInstance.apply(right, left);
81-
82-
assertEquals(expectedDistance, leftRightDistance);
83-
assertEquals(expectedDistance, rightLeftDistance);
84-
}
85-
86-
@ParameterizedTest(name = "DamerauLevenshteinDistance.unlimitedCompare(\"{0}\", \"{1}\") should return {2} ({3})")
87-
@MethodSource("unlimitedDamerauLevenshteinDistanceTestCases_SimilarityInput")
88-
void testCalculateDamerauLevenshteinDistance_SimilarityInput(String left, String right, int expectedDistance, final Class<?> cls) {
89-
SimilarityInput<Object> leftInput = SimilarityInputTest.build(cls, left);
90-
SimilarityInput<Object> rightInput = SimilarityInputTest.build(cls, right);
91-
92-
int leftRightDistance = defaultInstance.apply(leftInput, rightInput);
93-
int rightLeftDistance = defaultInstance.apply(rightInput, leftInput);
94-
95-
assertEquals(expectedDistance, leftRightDistance);
96-
assertEquals(expectedDistance, rightLeftDistance);
97-
}
98-
99-
@ParameterizedTest(name = "DamerauLevenshteinDistance.limitedCompare(\"{0}\", \"{1}\") should return {2}")
100-
@MethodSource("limitedDamerauLevenshteinDistanceTestCases")
101-
void testCalculateDamerauLevenshteinDistance(String left, String right, int threshold, int expectedDistance) {
102-
DamerauLevenshteinDistance instance = new DamerauLevenshteinDistance(threshold);
103-
104-
int leftRightDistance = instance.apply(left, right);
105-
int rightLeftDistance = instance.apply(right, left);
106-
107-
assertEquals(expectedDistance, leftRightDistance);
108-
assertEquals(expectedDistance, rightLeftDistance);
109-
}
110-
111-
@ParameterizedTest(name = "DamerauLevenshteinDistance.limitedCompare(\"{0}\", \"{1}\") should return {2}")
112-
@MethodSource("limitedDamerauLevenshteinDistanceTestCases_SimilarityInput")
113-
void testCalculateDamerauLevenshteinDistance_SimilarityInput(String left, String right, int threshold, int expectedDistance, final Class<?> cls) {
114-
DamerauLevenshteinDistance instance = new DamerauLevenshteinDistance(threshold);
115-
116-
SimilarityInput<Object> leftInput = SimilarityInputTest.build(cls, left);
117-
SimilarityInput<Object> rightInput = SimilarityInputTest.build(cls, right);
118-
119-
int leftRightDistance = instance.apply(leftInput, rightInput);
120-
int rightLeftDistance = instance.apply(rightInput, leftInput);
121-
122-
assertEquals(expectedDistance, leftRightDistance);
123-
assertEquals(expectedDistance, rightLeftDistance);
44+
static Stream<Arguments> limitedDamerauLevenshteinDistanceTestCases() {
45+
return Stream.of(
46+
Arguments.of("", "test", 10, 4),
47+
Arguments.of("test", "", 10, 4),
48+
Arguments.of("", "test", 2, -1),
49+
Arguments.of("test", "", 2, -1),
50+
Arguments.of("testing long string", "testing", 2, -1),
51+
Arguments.of("kitten", "sitting", 1, -1),
52+
Arguments.of("saturday", "sunday", 3, 3),
53+
Arguments.of("hello", "world", 6, 4),
54+
Arguments.of("algorithm", "logarithm", 1, -1),
55+
Arguments.of("computer", "comptuer", 1, 1),
56+
Arguments.of("receive", "recieve", 3, 1),
57+
Arguments.of("programming", "porgramming", 0, -1),
58+
Arguments.of("test", "tset", 1, 1),
59+
Arguments.of("example", "exmaple", 3, 1),
60+
Arguments.of("transform", "transfrom", 0, -1),
61+
Arguments.of("information", "infromation", 1, 1),
62+
Arguments.of("development", "developemnt", 3, 1),
63+
Arguments.of("password", "passwrod", 0, -1),
64+
Arguments.of("separate", "seperate", 1, 1),
65+
Arguments.of("definitely", "definately", 3, 1),
66+
Arguments.of("occurrence", "occurence", 0, -1),
67+
Arguments.of("necessary", "neccessary", 1, 1),
68+
Arguments.of("restaurant", "restaraunt", 4, 2),
69+
Arguments.of("beginning", "begining", 0, -1),
70+
Arguments.of("government", "goverment", 1, 1),
71+
Arguments.of("abcdefghijklmnop", "ponmlkjihgfedcba", 17, 15),
72+
Arguments.of("AAAAAAAAAA", "BBBBBBBBBB", 5, -1),
73+
Arguments.of("abababababab", "babababababa", 2, 2),
74+
Arguments.of("supercalifragilisticexpialidocious", "supercalifragilisticexpialidocous", 3, 1),
75+
Arguments.of("pneumonoultramicroscopicsilicovolcanoconiosiss", "pneumonoultramicroscopicsilicovolcanoconiosis", 0, -1),
76+
Arguments.of("abcdefg", "gfedcba", 6, 6),
77+
Arguments.of("xyxyxyxyxy", "yxyxyxyxyx", 4, 2),
78+
Arguments.of("aaaaabbbbbccccc", "cccccbbbbbaaaaa", 5, -1),
79+
Arguments.of("thequickbrownfoxjumpsoverthelazydog", "thequickbrownfoxjumpsovrethelazydog", 1, 1),
80+
Arguments.of("antidisestablishmentarianism", "antidisestablishmentarianisn", 3, 1)
81+
);
12482
}
12583

126-
static Stream<Arguments> unlimitedDamerauLevenshteinDistanceTestCases_SimilarityInput() {
84+
static Stream<Arguments> limitedDamerauLevenshteinDistanceTestCases_SimilarityInput() {
12785
return SimilarityInputTest.similarityInputs()
128-
.flatMap(cls -> unlimitedDamerauLevenshteinDistanceTestCases().map(arguments -> {
129-
Object[] values = Arrays.copyOf(arguments.get(), arguments.get().length + 1);
86+
.flatMap(cls -> limitedDamerauLevenshteinDistanceTestCases().map(arguments -> {
87+
final Object[] values = Arrays.copyOf(arguments.get(), arguments.get().length + 1);
13088
values[values.length - 1] = cls;
13189
return Arguments.of(values);
13290
}));
@@ -169,52 +127,88 @@ static Stream<Arguments> unlimitedDamerauLevenshteinDistanceTestCases() {
169127
);
170128
}
171129

172-
static Stream<Arguments> limitedDamerauLevenshteinDistanceTestCases_SimilarityInput() {
130+
static Stream<Arguments> unlimitedDamerauLevenshteinDistanceTestCases_SimilarityInput() {
173131
return SimilarityInputTest.similarityInputs()
174-
.flatMap(cls -> limitedDamerauLevenshteinDistanceTestCases().map(arguments -> {
175-
Object[] values = Arrays.copyOf(arguments.get(), arguments.get().length + 1);
132+
.flatMap(cls -> unlimitedDamerauLevenshteinDistanceTestCases().map(arguments -> {
133+
final Object[] values = Arrays.copyOf(arguments.get(), arguments.get().length + 1);
176134
values[values.length - 1] = cls;
177135
return Arguments.of(values);
178136
}));
179137
}
180138

181-
static Stream<Arguments> limitedDamerauLevenshteinDistanceTestCases() {
182-
return Stream.of(
183-
Arguments.of("", "test", 10, 4),
184-
Arguments.of("test", "", 10, 4),
185-
Arguments.of("", "test", 2, -1),
186-
Arguments.of("test", "", 2, -1),
187-
Arguments.of("testing long string", "testing", 2, -1),
188-
Arguments.of("kitten", "sitting", 1, -1),
189-
Arguments.of("saturday", "sunday", 3, 3),
190-
Arguments.of("hello", "world", 6, 4),
191-
Arguments.of("algorithm", "logarithm", 1, -1),
192-
Arguments.of("computer", "comptuer", 1, 1),
193-
Arguments.of("receive", "recieve", 3, 1),
194-
Arguments.of("programming", "porgramming", 0, -1),
195-
Arguments.of("test", "tset", 1, 1),
196-
Arguments.of("example", "exmaple", 3, 1),
197-
Arguments.of("transform", "transfrom", 0, -1),
198-
Arguments.of("information", "infromation", 1, 1),
199-
Arguments.of("development", "developemnt", 3, 1),
200-
Arguments.of("password", "passwrod", 0, -1),
201-
Arguments.of("separate", "seperate", 1, 1),
202-
Arguments.of("definitely", "definately", 3, 1),
203-
Arguments.of("occurrence", "occurence", 0, -1),
204-
Arguments.of("necessary", "neccessary", 1, 1),
205-
Arguments.of("restaurant", "restaraunt", 4, 2),
206-
Arguments.of("beginning", "begining", 0, -1),
207-
Arguments.of("government", "goverment", 1, 1),
208-
Arguments.of("abcdefghijklmnop", "ponmlkjihgfedcba", 17, 15),
209-
Arguments.of("AAAAAAAAAA", "BBBBBBBBBB", 5, -1),
210-
Arguments.of("abababababab", "babababababa", 2, 2),
211-
Arguments.of("supercalifragilisticexpialidocious", "supercalifragilisticexpialidocous", 3, 1),
212-
Arguments.of("pneumonoultramicroscopicsilicovolcanoconiosiss", "pneumonoultramicroscopicsilicovolcanoconiosis", 0, -1),
213-
Arguments.of("abcdefg", "gfedcba", 6, 6),
214-
Arguments.of("xyxyxyxyxy", "yxyxyxyxyx", 4, 2),
215-
Arguments.of("aaaaabbbbbccccc", "cccccbbbbbaaaaa", 5, -1),
216-
Arguments.of("thequickbrownfoxjumpsoverthelazydog", "thequickbrownfoxjumpsovrethelazydog", 1, 1),
217-
Arguments.of("antidisestablishmentarianism", "antidisestablishmentarianisn", 3, 1)
218-
);
139+
@ParameterizedTest(name = "DamerauLevenshteinDistance.unlimitedCompare(\"{0}\", \"{1}\") should return {2}")
140+
@MethodSource("unlimitedDamerauLevenshteinDistanceTestCases")
141+
void testCalculateDamerauLevenshteinDistance(final String left, final String right, final int expectedDistance) {
142+
final int leftRightDistance = defaultInstance.apply(left, right);
143+
final int rightLeftDistance = defaultInstance.apply(right, left);
144+
assertEquals(expectedDistance, leftRightDistance);
145+
assertEquals(expectedDistance, rightLeftDistance);
146+
}
147+
148+
@ParameterizedTest(name = "DamerauLevenshteinDistance.limitedCompare(\"{0}\", \"{1}\") should return {2}")
149+
@MethodSource("limitedDamerauLevenshteinDistanceTestCases")
150+
void testCalculateDamerauLevenshteinDistance(final String left, final String right, final int threshold, final int expectedDistance) {
151+
final DamerauLevenshteinDistance instance = new DamerauLevenshteinDistance(threshold);
152+
final int leftRightDistance = instance.apply(left, right);
153+
final int rightLeftDistance = instance.apply(right, left);
154+
assertEquals(expectedDistance, leftRightDistance);
155+
assertEquals(expectedDistance, rightLeftDistance);
156+
}
157+
158+
@ParameterizedTest(name = "DamerauLevenshteinDistance.unlimitedCompare(\"{0}\", \"{1}\") should return {2} ({3})")
159+
@MethodSource("unlimitedDamerauLevenshteinDistanceTestCases_SimilarityInput")
160+
void testCalculateDamerauLevenshteinDistance_SimilarityInput(final String left, final String right, final int expectedDistance, final Class<?> cls) {
161+
final SimilarityInput<Object> leftInput = SimilarityInputTest.build(cls, left);
162+
final SimilarityInput<Object> rightInput = SimilarityInputTest.build(cls, right);
163+
final int leftRightDistance = defaultInstance.apply(leftInput, rightInput);
164+
final int rightLeftDistance = defaultInstance.apply(rightInput, leftInput);
165+
assertEquals(expectedDistance, leftRightDistance);
166+
assertEquals(expectedDistance, rightLeftDistance);
167+
}
168+
169+
@ParameterizedTest(name = "DamerauLevenshteinDistance.limitedCompare(\"{0}\", \"{1}\") should return {2}")
170+
@MethodSource("limitedDamerauLevenshteinDistanceTestCases_SimilarityInput")
171+
void testCalculateDamerauLevenshteinDistance_SimilarityInput(final String left, final String right, final int threshold, final int expectedDistance,
172+
final Class<?> cls) {
173+
final DamerauLevenshteinDistance instance = new DamerauLevenshteinDistance(threshold);
174+
final SimilarityInput<Object> leftInput = SimilarityInputTest.build(cls, left);
175+
final SimilarityInput<Object> rightInput = SimilarityInputTest.build(cls, right);
176+
final int leftRightDistance = instance.apply(leftInput, rightInput);
177+
final int rightLeftDistance = instance.apply(rightInput, leftInput);
178+
assertEquals(expectedDistance, leftRightDistance);
179+
assertEquals(expectedDistance, rightLeftDistance);
180+
}
181+
182+
@Test
183+
void testGetThresholdDirectlyAfterObjectInstantiation() {
184+
assertNull(defaultInstance.getThreshold());
185+
}
186+
187+
@Test
188+
void testGetThresholdIsCorrect() {
189+
final DamerauLevenshteinDistance distance = new DamerauLevenshteinDistance(10);
190+
assertEquals(10, distance.getThreshold());
191+
}
192+
193+
@Test
194+
void testInvalidThresholdThrows() {
195+
assertThrows(IllegalArgumentException.class, () -> new DamerauLevenshteinDistance(-1));
196+
}
197+
198+
@Test
199+
void testNullInputsThrowLimited() {
200+
final DamerauLevenshteinDistance instance = new DamerauLevenshteinDistance(10);
201+
assertThrows(IllegalArgumentException.class, () -> instance.apply(null, "test"));
202+
assertThrows(IllegalArgumentException.class, () -> instance.apply("test", null));
203+
assertThrows(IllegalArgumentException.class, () -> instance.apply(null, SimilarityInput.input("test")));
204+
assertThrows(IllegalArgumentException.class, () -> instance.apply(SimilarityInput.input("test"), null));
205+
}
206+
207+
@Test
208+
void testNullInputsThrowUnlimited() {
209+
assertThrows(IllegalArgumentException.class, () -> defaultInstance.apply(null, "test"));
210+
assertThrows(IllegalArgumentException.class, () -> defaultInstance.apply("test", null));
211+
assertThrows(IllegalArgumentException.class, () -> defaultInstance.apply(null, SimilarityInput.input("test")));
212+
assertThrows(IllegalArgumentException.class, () -> defaultInstance.apply(SimilarityInput.input("test"), null));
219213
}
220214
}

0 commit comments

Comments
 (0)