Skip to content

Commit 9cf0365

Browse files
benmccannccleva
andauthored
Add Row.equals/Row.hashCode. Fix Table.isDuplicate when row hashes collide (#1267)
Co-authored-by: ccleva <[email protected]>
1 parent 5efb947 commit 9cf0365

File tree

2 files changed

+25
-58
lines changed

2 files changed

+25
-58
lines changed

core/src/main/java/tech/tablesaw/api/Row.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -809,8 +809,29 @@ public Column<?> column(int columnIndex) {
809809
return tableSlice.column(columnIndex);
810810
}
811811

812+
/** Returns true if every value is equal to the corresponding value in the given row */
813+
@Override
814+
public boolean equals(Object obj) {
815+
if (obj == null || obj.getClass() != this.getClass()) {
816+
return false;
817+
}
818+
819+
Row other = (Row) obj;
820+
if (columnCount() != other.columnCount()) {
821+
return false;
822+
}
823+
824+
for (int columnIndex = 0; columnIndex < columnCount(); columnIndex++) {
825+
if (!column(columnIndex).equals(getRowNumber(), other.getRowNumber())) {
826+
return false;
827+
}
828+
}
829+
return true;
830+
}
831+
812832
/** Returns a hash computed on the values in the backing table at this row */
813-
public int rowHash() {
833+
@Override
834+
public int hashCode() {
814835
int[] values = new int[columnCount()];
815836
for (int i = 0; i < columnCount(); i++) {
816837
Column<?> column = tableSlice.column(i);

core/src/main/java/tech/tablesaw/api/Table.java

Lines changed: 3 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -545,25 +545,6 @@ public static boolean compareRows(int rowNumber, Table table1, Table table2) {
545545
return true;
546546
}
547547

548-
/**
549-
* Returns true if every value in row1 is equal to the same value in row2, where row1 and row2 are
550-
* both rows from this table
551-
*/
552-
private boolean duplicateRows(Row row1, Row row2) {
553-
if (row1.columnCount() != row2.columnCount()) {
554-
return false;
555-
}
556-
boolean result;
557-
for (int columnIndex = 0; columnIndex < row1.columnCount(); columnIndex++) {
558-
Column<?> c = column(columnIndex);
559-
result = c.equals(row1.getRowNumber(), row2.getRowNumber());
560-
if (!result) {
561-
return false;
562-
}
563-
}
564-
return true;
565-
}
566-
567548
public Table[] sampleSplit(double table1Proportion) {
568549
Table[] tables = new Table[2];
569550
int table1Count = (int) Math.round(rowCount() * table1Proportion);
@@ -931,52 +912,17 @@ public TableSliceGroup splitOn(CategoricalColumn<?>... columns) {
931912
* this table, appears only once in the returned table.
932913
*/
933914
public Table dropDuplicateRows() {
934-
935915
Table temp = emptyCopy();
936-
Int2ObjectMap<IntArrayList> uniqueHashes = new Int2ObjectOpenHashMap<>();
937-
// ListMultimap<Integer, Integer> uniqueHashes = ArrayListMultimap.create();
916+
Set uniqueRows = new HashSet<>();
938917
for (Row row : this) {
939-
if (!isDuplicate(row, uniqueHashes)) {
918+
if (!uniqueRows.contains(row)) {
919+
uniqueRows.add(row);
940920
temp.append(row);
941921
}
942922
}
943923
return temp;
944924
}
945925

946-
/**
947-
* Returns true if all the values in row are identical to those in another row previously seen and
948-
* recorded in the list.
949-
*
950-
* @param row the row to evaluate
951-
* @param uniqueHashes a map of row hashes to the id of an exemplar row that produces that hash.
952-
* If two different rows produce the same hash, then the row number for each is placed in the
953-
* list, so that there are exemplars for both
954-
* @return true if the row's values exactly match a row that was previously seen
955-
*/
956-
private boolean isDuplicate(Row row, Int2ObjectMap<IntArrayList> uniqueHashes) {
957-
int hash = row.rowHash();
958-
if (!uniqueHashes.containsKey(hash)) {
959-
IntArrayList rowNumbers = new IntArrayList();
960-
rowNumbers.add(row.getRowNumber());
961-
uniqueHashes.put(hash, rowNumbers);
962-
return false;
963-
}
964-
965-
// the hashmap contains the hash, make sure the actual row values match
966-
IntArrayList matchingKeys = uniqueHashes.get(hash);
967-
968-
for (int key : matchingKeys) {
969-
Row oldRow = this.row(key);
970-
if (duplicateRows(row, oldRow)) {
971-
return true;
972-
} else {
973-
uniqueHashes.get(hash).add(row.getRowNumber());
974-
return false;
975-
}
976-
}
977-
return true;
978-
}
979-
980926
/** Returns only those records in this table that have no columns with missing values */
981927
public Table dropRowsWithMissingValues() {
982928

0 commit comments

Comments
 (0)