Skip to content

Commit c3f1262

Browse files
committed
rewrite CassandraData.parse to use Files.walkFileTree instead of Files.find
1 parent 5829b35 commit c3f1262

2 files changed

Lines changed: 160 additions & 15 deletions

File tree

src/main/java/com/instaclustr/esop/impl/CassandraData.java

Lines changed: 92 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
package com.instaclustr.esop.impl;
22

33
import static java.lang.String.format;
4-
import static java.util.stream.Collectors.groupingBy;
4+
import static java.util.stream.Collectors.toList;
55

6+
import java.io.IOException;
67
import java.nio.file.FileVisitResult;
78
import java.nio.file.Files;
89
import java.nio.file.Path;
@@ -17,6 +18,7 @@
1718
import java.util.Map.Entry;
1819
import java.util.Optional;
1920
import java.util.function.Predicate;
21+
import java.util.stream.Collectors;
2022

2123
import com.instaclustr.esop.impl.RenamedEntities.Renamed;
2224

@@ -158,27 +160,102 @@ public RenamedEntities getRenamedEntities() {
158160
return renamedEntities;
159161
}
160162

163+
public static class SnapshotsLister extends SimpleFileVisitor<Path> {
164+
165+
private boolean isDropped = false;
166+
167+
@Override
168+
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
169+
if (dir.getParent().getFileName().toString().equals("snapshots")) {
170+
if (dir.getFileName().toString().startsWith("dropped-")) {
171+
isDropped = true;
172+
return FileVisitResult.TERMINATE;
173+
} else {
174+
return FileVisitResult.SKIP_SUBTREE;
175+
}
176+
} else {
177+
return FileVisitResult.CONTINUE;
178+
}
179+
}
180+
181+
public boolean isDropped() {
182+
return isDropped;
183+
}
184+
}
185+
186+
public static class KeyspaceTableLister extends SimpleFileVisitor<Path> {
187+
188+
private final Path cassandraDir;
189+
private final Map<Path, List<Path>> dataDirs = new HashMap<>();
190+
191+
public KeyspaceTableLister(final Path cassandraDir) {
192+
this.cassandraDir = cassandraDir;
193+
}
194+
195+
@Override
196+
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
197+
// we hit keyspace
198+
if (dir.getParent().equals(cassandraDir)) {
199+
dataDirs.putIfAbsent(dir, new ArrayList<>());
200+
return FileVisitResult.CONTINUE;
201+
// we hit table
202+
} else if (dir.getParent().getParent().equals(cassandraDir)) {
203+
// detect if it is a dropped table
204+
Path snapshotsDir = dir.resolve("snapshots");
205+
if (Files.exists(snapshotsDir)) {
206+
SnapshotsLister snapshotsLister = new SnapshotsLister();
207+
Files.walkFileTree(snapshotsDir, snapshotsLister);
208+
if (!snapshotsLister.isDropped()) {
209+
dataDirs.get(dir.getParent()).add(dir);
210+
}
211+
} else {
212+
dataDirs.get(dir.getParent()).add(dir);
213+
}
214+
215+
return FileVisitResult.SKIP_SUBTREE;
216+
} else {
217+
return FileVisitResult.CONTINUE;
218+
}
219+
}
220+
221+
/**
222+
* Remove keyspaces which have 0 tables, it means that each table has a snapshot with "dropped-" snapshot name
223+
*/
224+
public void removeDroppedKeyspaces() {
225+
final List<Path> droppedKeyspaces = dataDirs
226+
.entrySet()
227+
.stream()
228+
.filter(entry -> entry.getValue().isEmpty())
229+
.map(Entry::getKey)
230+
.collect(toList());
231+
232+
for (final Path droppedKeyspace : droppedKeyspaces) {
233+
dataDirs.remove(droppedKeyspace);
234+
}
235+
}
236+
237+
public Map<Path, List<Path>> getDataDirs() {
238+
return dataDirs;
239+
}
240+
241+
@Override
242+
public String toString() {
243+
return dataDirs.toString();
244+
}
245+
}
246+
161247
public static CassandraData parse(final Path cassandraDir) throws Exception {
162248

163249
if (!Files.exists(cassandraDir)) {
164250
return CassandraData.empty();
165251
}
166252

167-
final Map<Path, List<Path>> dataDirs = Files.find(cassandraDir,
168-
2,
169-
(path, basicFileAttributes) -> basicFileAttributes.isDirectory() &&
170-
!path.getParent().equals(cassandraDir) &&
171-
!path.equals(cassandraDir))
172-
// take only these into consideration which do not have "snapshots/dropped-"
173-
.filter(table -> {
174-
try {
175-
return Files.find(table, 2, (p, b) -> b.isDirectory() && p.toString().contains("snapshots/dropped-")).count() == 0;
176-
} catch (final Exception ex) {
177-
return false;
178-
}
253+
final KeyspaceTableLister lister = new KeyspaceTableLister(cassandraDir);
254+
255+
Files.walkFileTree(cassandraDir, lister);
256+
lister.removeDroppedKeyspaces();
179257

180-
})
181-
.collect(groupingBy(Path::getParent));
258+
final Map<Path, List<Path>> dataDirs = lister.getDataDirs();
182259

183260
final Map<String, Map<String, String>> tableIdsMap = new HashMap<>();
184261

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package com.instaclustr.esop.backup;
2+
3+
import java.nio.file.Files;
4+
import java.nio.file.Path;
5+
import java.util.List;
6+
import java.util.Map;
7+
8+
import com.instaclustr.esop.impl.CassandraData.KeyspaceTableLister;
9+
import com.instaclustr.io.FileUtils;
10+
import org.slf4j.Logger;
11+
import org.slf4j.LoggerFactory;
12+
import org.testng.Assert;
13+
import org.testng.annotations.Test;
14+
15+
public class KeyspaceTableListerTest {
16+
17+
private static final Logger logger = LoggerFactory.getLogger(KeyspaceTableLister.class);
18+
19+
@Test
20+
public void testKeyspaceTableLister() throws Exception {
21+
Path cassandrDir = Files.createTempDirectory("keyspace-table-lister");
22+
23+
// in total 20000 files in 1000 tables in 100 keyspaces
24+
25+
for (int i = 0; i < 90; i++) {
26+
Files.createDirectory(cassandrDir.resolve("ks" + i));
27+
for (int j = 0; j < 10; j++) {
28+
Files.createDirectory(cassandrDir.resolve("ks" + i + "/tb-" + j));
29+
for (int k = 0; k < 20; k++) {
30+
Files.createFile(cassandrDir.resolve("ks" + i + "/tb-" + j + "/file" + k));
31+
}
32+
}
33+
}
34+
35+
for (int i = 0; i < 5; i++) {
36+
for (int j = 0; j < 5; j++) {
37+
Files.createDirectory(cassandrDir.resolve("ks" + i + "/tb-" + j + "/snapshots"));
38+
Files.createDirectory(cassandrDir.resolve("ks" + i + "/tb-" + j + "/snapshots/dropped-3424324"));
39+
}
40+
}
41+
42+
// keyspaces from no. 90 - 99 will have all tables dropped which renders them dropped too
43+
44+
for (int i = 90; i < 99; i++) {
45+
Files.createDirectory(cassandrDir.resolve("ks" + i));
46+
for (int j = 0; j < 5; j++) {
47+
Files.createDirectory(cassandrDir.resolve("ks" + i + "/tb-" + j));
48+
Files.createDirectory(cassandrDir.resolve("ks" + i + "/tb-" + j + "/snapshots"));
49+
Files.createDirectory(cassandrDir.resolve("ks" + i + "/tb-" + j + "/snapshots/dropped-3424324"));
50+
}
51+
}
52+
53+
KeyspaceTableLister lister = new KeyspaceTableLister(cassandrDir);
54+
Files.walkFileTree(cassandrDir, lister);
55+
56+
lister.removeDroppedKeyspaces();
57+
58+
Map<Path, List<Path>> dataDirs = lister.getDataDirs();
59+
Assert.assertFalse(dataDirs.isEmpty());
60+
61+
Assert.assertEquals(90, dataDirs.size());
62+
63+
// 25 tables are dropped and 10 keyspaces, each having 10 tables, have all tables dropped: 1000 - 100 - 25 = 875
64+
Assert.assertEquals(875, dataDirs.values().stream().map(List::size).reduce(0, Integer::sum).intValue());
65+
66+
FileUtils.deleteDirectory(cassandrDir);
67+
}
68+
}

0 commit comments

Comments
 (0)