Skip to content

Commit 931285d

Browse files
committed
multiple changes
* added scan problems column that sums scan errors, scan mem pauses, and scan mem returns * added scan open files column and scan yield column * fixed bug with computing cache hit ratio * narrowed what stats are used to display specific metric types * cleaned up some errors in the Metric enum
1 parent 08a2dc1 commit 931285d

6 files changed

Lines changed: 122 additions & 19 deletions

File tree

core/src/main/java/org/apache/accumulo/core/metrics/Metric.java

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,10 @@ public enum Metric {
8383
MetricDocSection.COMPACTION, "Majc In Progress", null, NUMBER),
8484
COMPACTOR_MAJC_STUCK("accumulo.compaction.majc.stuck", MetricType.LONG_TASK_TIMER,
8585
"Number and duration of stuck major compactions.", MetricDocSection.COMPACTION, "Majc Stuck",
86-
null, NUMBER),
86+
null, DURATION),
8787
COMPACTOR_MINC_STUCK("accumulo.compaction.minc.stuck", MetricType.LONG_TASK_TIMER,
8888
"Number and duration of stuck minor compactions.", MetricDocSection.COMPACTION, "Minc Stuck",
89-
null, NUMBER),
89+
null, DURATION),
9090
COMPACTOR_ENTRIES_READ("accumulo.compaction.entries.read", MetricType.FUNCTION_COUNTER,
9191
"Number of entries read by all compactions that have run on this compactor (majc) or tserver (minc).",
9292
MetricDocSection.COMPACTION, "Compaction Entries Read", null, NUMBER),
@@ -122,7 +122,7 @@ public enum Metric {
122122
MetricDocSection.COMPACTION, "Compaction Queue Avg Job Age", null, NUMBER),
123123
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_POLL_TIMER("accumulo.compaction.queue.jobs.exit.time",
124124
MetricType.TIMER, "Tracks time a job spent in the queue before exiting the queue.",
125-
MetricDocSection.COMPACTION, "Compaction Queue Job Time Queued", null, NUMBER),
125+
MetricDocSection.COMPACTION, "Compaction Queue Job Time Queued", null, DURATION),
126126

127127
// Fate Metrics
128128
FATE_TYPE_IN_PROGRESS("accumulo.fate.ops.in.progress.by.type", MetricType.GAUGE,
@@ -242,11 +242,11 @@ public enum Metric {
242242
// Scan Server Metrics
243243
SCAN_RESERVATION_TOTAL_TIMER("accumulo.scan.reservation.total.timer", MetricType.TIMER,
244244
"Average time to reserve a tablet's files for scan.", MetricDocSection.SCAN_SERVER,
245-
"Mean Reservation", null, NUMBER),
245+
"Mean Reservation", null, DURATION),
246246
SCAN_RESERVATION_WRITEOUT_TIMER("accumulo.scan.reservation.writeout.timer", MetricType.TIMER,
247247
"Time to write out a tablets file reservations for scan.", MetricDocSection.SCAN_SERVER,
248-
"Scan Reservation Write Time", null, NUMBER),
249-
SCAN_RESERVATION_FILES("accumulo.scan.reservation.files", MetricType.TIMER,
248+
"Scan Reservation Write Time", null, DURATION),
249+
SCAN_RESERVATION_FILES("accumulo.scan.reservation.files", MetricType.GAUGE,
250250
"The number of files reserved by a scan server.", MetricDocSection.SCAN_SERVER,
251251
"Files Reserved", null, NUMBER),
252252
SCAN_RESERVATION_CONFLICT_COUNTER("accumulo.scan.reservation.conflict.count", MetricType.COUNTER,
@@ -261,7 +261,7 @@ public enum Metric {
261261

262262
// Scan Metrics
263263
SCAN_TIMES("accumulo.scan.times", MetricType.TIMER, "Scan session lifetime (creation to close).",
264-
MetricDocSection.SCAN, "Scan Session Total Time", null, NUMBER),
264+
MetricDocSection.SCAN, "Scan Session Total Time", null, DURATION),
265265
SCAN_OPEN_FILES("accumulo.scan.files.open", MetricType.GAUGE, "Number of files open for scans.",
266266
MetricDocSection.SCAN, "Scan Files Open", null, NUMBER),
267267
SCAN_RESULTS("accumulo.scan.result", MetricType.DISTRIBUTION_SUMMARY, "Results per scan.",
@@ -310,10 +310,11 @@ public enum Metric {
310310
// Minor Compaction Metrics
311311
MINC_QUEUED("accumulo.compaction.minc.queued", MetricType.TIMER,
312312
"Queued minor compactions time queued.", MetricDocSection.COMPACTION, "Minc Queued", null,
313-
NUMBER),
313+
DURATION),
314314
MINC_RUNNING("accumulo.compaction.minc.running", MetricType.TIMER,
315-
"Minor compactions time active.", MetricDocSection.COMPACTION, "Minc Running", null, NUMBER),
316-
MINC_PAUSED("accumulo.compaction.minc.paused", MetricType.COUNTER,
315+
"Minor compactions time active.", MetricDocSection.COMPACTION, "Minc Running", null,
316+
DURATION),
317+
MINC_PAUSED("accumulo.compaction.minc.paused", MetricType.FUNCTION_COUNTER,
317318
"Number of paused minor compactions.", MetricDocSection.COMPACTION, "Minc Paused", null,
318319
NUMBER),
319320

@@ -323,19 +324,19 @@ public enum Metric {
323324
MetricDocSection.TABLET_SERVER, "Ingest Errors", null, NUMBER),
324325
UPDATE_LOCK("accumulo.updates.lock", MetricType.TIMER,
325326
"Average time taken for conditional mutation to get a row lock.",
326-
MetricDocSection.TABLET_SERVER, "Conditional Mutation Row Lock Wait Time", null, NUMBER),
327+
MetricDocSection.TABLET_SERVER, "Conditional Mutation Row Lock Wait Time", null, DURATION),
327328
UPDATE_CHECK("accumulo.updates.check", MetricType.TIMER,
328329
"Average time taken for conditional mutation to check conditions.",
329-
MetricDocSection.TABLET_SERVER, "Conditional Mutation Condition Check Time", null, NUMBER),
330+
MetricDocSection.TABLET_SERVER, "Conditional Mutation Condition Check Time", null, DURATION),
330331
UPDATE_COMMIT("accumulo.updates.commit", MetricType.TIMER,
331332
"Average time taken to commit a mutation.", MetricDocSection.TABLET_SERVER,
332-
"Mutation Commit Avg Total Time", null, NUMBER),
333+
"Mutation Commit Avg Total Time", null, DURATION),
333334
UPDATE_COMMIT_PREP("accumulo.updates.commit.prep", MetricType.TIMER,
334335
"Average time taken to prepare to commit a single mutation.", MetricDocSection.TABLET_SERVER,
335-
"Mutation Commit Avg Prep Time", null, NUMBER),
336+
"Mutation Commit Avg Prep Time", null, DURATION),
336337
UPDATE_WALOG_WRITE("accumulo.updates.walog.write", MetricType.TIMER,
337338
"Time taken to write a batch of mutations to WAL.", MetricDocSection.TABLET_SERVER,
338-
"WAL Write Time", null, NUMBER),
339+
"WAL Write Time", null, DURATION),
339340
UPDATE_MUTATION_ARRAY_SIZE("accumulo.updates.mutation.arrays.size",
340341
MetricType.DISTRIBUTION_SUMMARY, "Batch size of mutations from client.",
341342
MetricDocSection.TABLET_SERVER, "Mutation Batch Size", null, NUMBER),

server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/CacheSizeColumnFactory.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@
2121
import java.util.List;
2222
import java.util.Map;
2323

24-
import com.google.common.base.Preconditions;
2524
import org.apache.accumulo.core.client.admin.servers.ServerId;
2625
import org.apache.accumulo.core.metrics.Metric;
2726
import org.apache.accumulo.core.metrics.flatbuffers.FMetric;
2827
import org.apache.accumulo.core.metrics.flatbuffers.FTag;
2928
import org.apache.accumulo.core.process.thrift.MetricResponse;
3029
import org.apache.accumulo.monitor.next.SystemInformation;
3130

31+
import com.google.common.base.Preconditions;
32+
3233
public class CacheSizeColumnFactory implements ColumnFactory {
3334

3435
private final String cacheName;

server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MetricColumnFactory.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.Arrays;
2222
import java.util.List;
2323
import java.util.Map;
24+
import java.util.function.Predicate;
2425
import java.util.stream.Collectors;
2526

2627
import org.apache.accumulo.core.client.admin.servers.ServerId;
@@ -34,6 +35,7 @@ public class MetricColumnFactory implements ColumnFactory {
3435

3536
private final Column column;
3637
private final boolean computeRate;
38+
private final Predicate<String> statPredicate;
3739

3840
MetricColumnFactory(Metric metric) {
3941
String classes;
@@ -51,6 +53,14 @@ public class MetricColumnFactory implements ColumnFactory {
5153
}
5254
this.column = new Column(metric.getName(), metric.getColumnHeader(),
5355
metric.getColumnDescription(), classes);
56+
57+
statPredicate = switch (metric.getType()) {
58+
case GAUGE -> sv -> sv.equals(StatType.VALUE);
59+
case COUNTER, FUNCTION_COUNTER -> sv -> sv.equals(StatType.COUNT);
60+
case TIMER, DISTRIBUTION_SUMMARY -> sv -> sv.equals(StatType.AVERAGE);
61+
case LONG_TASK_TIMER -> sv -> sv.equals(StatType.MAX);
62+
case CACHE -> StatType.COUNT_OR_VALUE; // TODO this class does not really support this type
63+
};
5464
}
5565

5666
@Override
@@ -61,7 +71,7 @@ public Column getColumn() {
6171
@Override
6272
public Object getRowData(ServerId sid, MetricResponse mr,
6373
Map<String,List<FMetric>> serverMetrics) {
64-
var sum = sum(serverMetrics.getOrDefault(column.key(), List.of()), StatType.COUNT_OR_VALUE);
74+
var sum = sum(serverMetrics.getOrDefault(column.key(), List.of()), statPredicate);
6575
if (computeRate) {
6676
return computeRate(sum);
6777
} else {
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* https://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.accumulo.monitor.next.views;
20+
21+
import static java.nio.charset.StandardCharsets.UTF_8;
22+
23+
import java.util.Arrays;
24+
import java.util.List;
25+
import java.util.Map;
26+
import java.util.Set;
27+
28+
import org.apache.accumulo.core.client.admin.servers.ServerId;
29+
import org.apache.accumulo.core.metrics.Metric;
30+
import org.apache.accumulo.core.metrics.flatbuffers.FMetric;
31+
import org.apache.accumulo.core.process.thrift.MetricResponse;
32+
33+
import com.google.common.base.Preconditions;
34+
import com.google.common.hash.Hashing;
35+
36+
/**
37+
* Sums multiple metrics of the same type for a column value
38+
*/
39+
public class MultiSumColumnFactory implements ColumnFactory {
40+
41+
private final List<MetricColumnFactory> colFactories;
42+
private final TableData.Column column;
43+
44+
MultiSumColumnFactory(String label, Metric... metrics) {
45+
Preconditions.checkArgument(metrics.length > 1);
46+
// ensure all metrics are of the same type
47+
Preconditions
48+
.checkArgument(Arrays.stream(metrics).allMatch(m -> m.getType() == metrics[0].getType()));
49+
// ensure all metrics have the same display type
50+
Preconditions.checkArgument(
51+
Arrays.stream(metrics).allMatch(m -> Set.of(Arrays.asList(m.getColumnClasses()))
52+
.equals(Set.of(Arrays.asList(metrics[0].getColumnClasses())))));
53+
54+
this.colFactories = Arrays.stream(metrics).map(MetricColumnFactory::new).toList();
55+
56+
StringBuilder description = new StringBuilder("A sum of the following metrics :");
57+
var hasher = Hashing.sha256().newHasher();
58+
for (int i = 0; i < metrics.length; i++) {
59+
description.append(" ");
60+
description.append((i + 1) + ") " + metrics[i].getDescription());
61+
if (!metrics[i].getDescription().endsWith(".")) {
62+
description.append(".");
63+
}
64+
hasher.putString(metrics[i].getName(), UTF_8);
65+
}
66+
67+
var key = hasher.hash().toString();
68+
69+
this.column = new TableData.Column(key, label, description.toString(),
70+
colFactories.get(0).getColumn().uiClass());
71+
}
72+
73+
@Override
74+
public TableData.Column getColumn() {
75+
return column;
76+
}
77+
78+
@Override
79+
public Object getRowData(ServerId sid, MetricResponse mr,
80+
Map<String,List<FMetric>> serverMetrics) {
81+
Number sum = null;
82+
for (var colf : colFactories) {
83+
sum = add(sum, (Number) colf.getRowData(sid, mr, serverMetrics));
84+
}
85+
return sum;
86+
}
87+
}

server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/RatioColumnFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public Object getRowData(ServerId sid, MetricResponse mr,
5757
}
5858

5959
var numeratorSum = sum(n, StatType.COUNT_OR_VALUE).doubleValue();
60-
var denominatorSum = sum(n, StatType.COUNT_OR_VALUE).doubleValue();
60+
var denominatorSum = sum(d, StatType.COUNT_OR_VALUE).doubleValue();
6161

6262
if (denominatorSum == 0) {
6363
return null;

server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/TableDataFactory.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ public static class StatType {
8383
public static final String COUNT = "count";
8484
public static final String VALUE = "value";
8585
public static final String AVERAGE = "avg";
86+
public static final String MAX = "max";
8687

8788
public static final Predicate<String> COUNT_OR_VALUE = s -> COUNT.equals(s) || VALUE.equals(s);
8889
}
@@ -251,7 +252,8 @@ private static void scanColumns(List<ColumnFactory> cols) {
251252
cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.QUEUED,
252253
ThreadPoolNames.SCAN_EXECUTOR_PREFIX.poolName, "Queued scans",
253254
"Scan task queued on all scan thread pools"));
254-
cols.add(new MetricColumnFactory(Metric.SCAN_ERRORS));
255+
cols.add(new MultiSumColumnFactory("Scan Problems", Metric.SCAN_ERRORS,
256+
Metric.SCAN_PAUSED_FOR_MEM, Metric.SCAN_RETURN_FOR_MEM));
255257
cols.add(new MetricColumnFactory(Metric.SCAN_SCANNED_ENTRIES));
256258
cols.add(new MetricColumnFactory(Metric.SCAN_QUERY_SCAN_RESULTS));
257259
cols.add(new MetricColumnFactory(Metric.SCAN_QUERY_SCAN_RESULTS_BYTES));
@@ -261,6 +263,8 @@ private static void scanColumns(List<ColumnFactory> cols) {
261263
cols.add(new RatioColumnFactory("Data cache hit",
262264
"Ratio of hits/total request for the data block cache", Metric.BLOCKCACHE_DATA_HITCOUNT,
263265
Metric.BLOCKCACHE_DATA_REQUESTCOUNT));
266+
cols.add(new MetricColumnFactory(Metric.SCAN_OPEN_FILES));
267+
cols.add(new MetricColumnFactory(Metric.SCAN_YIELDS));
264268
}
265269

266270
private static void scanServerColumns(List<ColumnFactory> cols) {

0 commit comments

Comments
 (0)