Skip to content

Commit 0178047

Browse files
OAK-11478 Node store statistics: support the tree store (#2071)
* OAK-11478 Node store statistics: support the tree store * OAK-11478 Node store statistics: support the tree store * OAK-11478 Node store statistics: support the tree store
1 parent 64813b0 commit 0178047

File tree

13 files changed

+175
-820
lines changed

13 files changed

+175
-820
lines changed

oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/Profiler.java

+7-2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ public class Profiler implements Runnable {
5757
"sun," +
5858
"com.sun.," +
5959
"com.mongodb.," +
60+
"org.apache.jackrabbit.oak.commons.Profiler.," +
6061
"org.bson.,"
6162
).split(",");
6263
private final String[] ignorePackages = (
@@ -380,15 +381,19 @@ private void tick() {
380381

381382
private void processList(List<Object[]> list) {
382383
for (Object[] dump : list) {
383-
if (startsWithAny(dump[0].toString(), ignoreThreads)) {
384+
String el = dump[0].toString();
385+
if (el.startsWith("app//")) {
386+
el = el.substring("app//".length());
387+
}
388+
if (startsWithAny(el, ignoreThreads)) {
384389
continue;
385390
}
386391
StringBuilder buff = new StringBuilder();
387392
// simple recursive calls are ignored
388393
String last = null;
389394
boolean packageCounts = false;
390395
for (int j = 0, i = 0; i < dump.length && j < depth; i++) {
391-
String el = dump[i].toString();
396+
el = dump[i].toString();
392397
if (el.startsWith("app//")) {
393398
el = el.substring("app//".length());
394399
}

oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java

+31-33
Original file line numberDiff line numberDiff line change
@@ -34,54 +34,52 @@
3434
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modules.TopLargestBinaries;
3535
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeData;
3636
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeDataReader;
37-
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeLineReader;
38-
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeStreamReader;
39-
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeStreamReaderCompressed;
37+
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeTreeStoreReader;
4038

4139
/**
4240
* Builder for commonly used statistics for flat file stores.
4341
*/
4442
public class StatsBuilder {
4543

46-
private static final boolean ONLY_READ = false;
47-
48-
/**
49-
* Read a flat file store and build statistics.
50-
*
51-
* @param args the file name
52-
*/
5344
public static void main(String... args) throws Exception {
5445
String fileName = null;
5546
String nodeNameFilter = null;
56-
boolean stream = false;
57-
boolean compressedStream = false;
47+
boolean profiler = false;
5848
for(int i = 0; i<args.length; i++) {
5949
String a = args[i];
6050
if (a.equals("--fileName")) {
6151
fileName = args[++i];
6252
} else if (a.equals("--nodeNameFilter")) {
6353
nodeNameFilter = args[++i];
64-
} else if (a.endsWith("--stream")) {
65-
stream = true;
66-
} else if (a.equals("--compressedStream")) {
67-
compressedStream = true;
54+
} else if (a.equals("--profiler")) {
55+
profiler = true;
6856
}
6957
}
7058
if (fileName == null) {
7159
System.out.println("Command line arguments:");
7260
System.out.println(" --fileName <file name> (flat file store file name; mandatory)");
7361
System.out.println(" --nodeNameFilter <filter> (node name filter for binaries; optional)");
74-
System.out.println(" --stream (use a stream file; optional)");
75-
System.out.println(" --compressedStream (use a compressed stream file; optional)");
62+
System.out.println(" --profiler (enable the build-in profiler; optional)");
7663
return;
7764
}
65+
buildStats(fileName, nodeNameFilter, profiler);
66+
}
67+
68+
/**
69+
* Read a flat file store and build statistics.
70+
*
71+
* @param fileName the file name
72+
* @param nodeNameFilter the node names to filter
73+
* @param profiler also run the profiler
74+
*/
75+
public static void buildStats(String fileName, String nodeNameFilter, boolean profiler) throws IOException {
7876
System.out.println("Processing " + fileName);
7977
ListCollector collectors = new ListCollector();
8078
collectors.add(new NodeCount(1000, 1));
8179
PropertyStats ps = new PropertyStats(false, 1);
8280
collectors.add(ps);
8381
collectors.add(new NodeTypeCount());
84-
if (nodeNameFilter != null) {
82+
if (nodeNameFilter != null && !nodeNameFilter.isEmpty()) {
8583
collectors.add(new NodeNameFilter(nodeNameFilter, new BinarySize(false, 1)));
8684
collectors.add(new NodeNameFilter(nodeNameFilter, new BinarySize(true, 1)));
8785
collectors.add(new NodeNameFilter(nodeNameFilter, new BinarySizeHistogram(1)));
@@ -94,18 +92,16 @@ public static void main(String... args) throws Exception {
9492
collectors.add(new DistinctBinarySizeHistogram(1));
9593
collectors.add(new DistinctBinarySize(16, 16));
9694

97-
Profiler prof = new Profiler().startCollecting();
98-
NodeDataReader reader;
99-
if (compressedStream) {
100-
reader = NodeStreamReaderCompressed.open(fileName);
101-
} else if (stream) {
102-
reader = NodeStreamReader.open(fileName);
103-
} else {
104-
reader = NodeLineReader.open(fileName);
95+
Profiler prof = null;
96+
if (profiler) {
97+
prof = new Profiler().startCollecting();
10598
}
99+
NodeDataReader reader = NodeTreeStoreReader.open(fileName);
106100
collect(reader, collectors);
107101

108-
System.out.println(prof.getTop(10));
102+
if (profiler) {
103+
System.out.println(prof.getTop(10));
104+
}
109105
System.out.println();
110106
System.out.println("Results");
111107
System.out.println();
@@ -123,11 +119,13 @@ private static void collect(NodeDataReader reader, StatsCollector collector) thr
123119
if (node == null) {
124120
break;
125121
}
126-
if (++lineCount % 1000000 == 0) {
127-
System.out.println(lineCount + " lines; " + reader.getProgressPercent() + "%");
128-
}
129-
if (ONLY_READ) {
130-
continue;
122+
if (++lineCount % 1_000_000 == 0) {
123+
String msg = lineCount + " entries";
124+
int progressPercent = reader.getProgressPercent();
125+
if (progressPercent != 0) {
126+
msg += "; " + progressPercent + "%";
127+
}
128+
System.out.println(msg);
131129
}
132130
if (last != null) {
133131
while (last != null && last.getPathElements().size() >= node.getPathElements().size()) {

oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ public void add(NodeData node) {
106106
referenceCount += list.size();
107107
for(BinaryId id : list) {
108108
referenceSize += id.getLength();
109-
if (largeBinariesCountMax > 0 && id.getLength() >= largeBinarySizeThreshold) {
109+
if (largeBinariesCountMax > 0 && id.getLength() > largeBinarySizeThreshold) {
110110
largeBinaries.add(id);
111111
truncateLargeBinariesSet();
112112
} else {
@@ -132,15 +132,15 @@ private void truncateLargeBinariesSet() {
132132
}
133133
long[] lengths = new long[largeBinaries.size()];
134134
int i = 0;
135-
for(BinaryId id : largeBinaries) {
135+
for (BinaryId id : largeBinaries) {
136136
lengths[i++] = id.getLength();
137137
}
138138
Arrays.sort(lengths);
139139
// the new threshold is the median of all the lengths
140140
largeBinarySizeThreshold = lengths[largeBinariesCountMax];
141-
for(Iterator<BinaryId> it = largeBinaries.iterator(); it.hasNext();) {
141+
for (Iterator<BinaryId> it = largeBinaries.iterator(); it.hasNext();) {
142142
BinaryId id = it.next();
143-
if (id.getLength() < largeBinarySizeThreshold) {
143+
if (id.getLength() <= largeBinarySizeThreshold) {
144144
addToBloomFilter(id);
145145
it.remove();
146146
}

oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/stream/NodeStreamConverter.java

-103
This file was deleted.

0 commit comments

Comments
 (0)