Skip to content

Commit 6f8dfa1

Browse files
committed
RATIS-2486. Add detailed zero-copy metrics for gRPC message types and fallback tracking.
1 parent d5bd9c4 commit 6f8dfa1

4 files changed

Lines changed: 127 additions & 9 deletions

File tree

ratis-grpc/src/main/java/org/apache/ratis/grpc/metrics/ZeroCopyMetrics.java

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,20 @@ public class ZeroCopyMetrics extends RatisMetrics {
3535
private final LongCounter nonZeroCopyMessages = getRegistry().counter("num_non_zero_copy_messages");
3636
private final LongCounter releasedMessages = getRegistry().counter("num_released_messages");
3737

38+
// Per-message-type zero-copy counters.
39+
private final LongCounter zeroCopyAppendEntries = getRegistry().counter("num_zero_copy_append_entries");
40+
private final LongCounter zeroCopyInstallSnapshot = getRegistry().counter("num_zero_copy_install_snapshot");
41+
private final LongCounter zeroCopyClientRequest = getRegistry().counter("num_zero_copy_client_request");
42+
43+
// Aggregated savings and parse time (nanos) for zero-copy path.
44+
private final LongCounter bytesSavedByZeroCopy = getRegistry().counter("bytes_saved_by_zero_copy");
45+
private final LongCounter zeroCopyParseTimeNanos = getRegistry().counter("zero_copy_parse_time_nanos");
46+
47+
// Reason counters for zero-copy fallback.
48+
private final LongCounter fallbackNotKnownLength = getRegistry().counter("zero_copy_fallback_not_known_length");
49+
private final LongCounter fallbackNotDetachable = getRegistry().counter("zero_copy_fallback_not_detachable");
50+
private final LongCounter fallbackNotByteBuffer = getRegistry().counter("zero_copy_fallback_not_byte_buffer");
51+
3852
public ZeroCopyMetrics() {
3953
super(createRegistry());
4054
}
@@ -54,6 +68,21 @@ public void onZeroCopyMessage(AbstractMessage ignored) {
5468
zeroCopyMessages.inc();
5569
}
5670

71+
public void onZeroCopyAppendEntries(AbstractMessage ignored) {
72+
onZeroCopyMessage(ignored);
73+
zeroCopyAppendEntries.inc();
74+
}
75+
76+
public void onZeroCopyInstallSnapshot(AbstractMessage ignored) {
77+
onZeroCopyMessage(ignored);
78+
zeroCopyInstallSnapshot.inc();
79+
}
80+
81+
public void onZeroCopyClientRequest(AbstractMessage ignored) {
82+
onZeroCopyMessage(ignored);
83+
zeroCopyClientRequest.inc();
84+
}
85+
5786
public void onNonZeroCopyMessage(AbstractMessage ignored) {
5887
nonZeroCopyMessages.inc();
5988
}
@@ -62,6 +91,34 @@ public void onReleasedMessage(AbstractMessage ignored) {
6291
releasedMessages.inc();
6392
}
6493

94+
public ZeroCopyMessageMarshallerMetrics newMarshallerMetrics() {
95+
return new ZeroCopyMessageMarshallerMetrics();
96+
}
97+
98+
// Adapter used by ZeroCopyMessageMarshaller to report parse stats and fallback reasons.
99+
public class ZeroCopyMessageMarshallerMetrics implements org.apache.ratis.grpc.util.ZeroCopyMessageMarshaller.Metrics {
100+
@Override
101+
public void onZeroCopyParse(long bytesSaved, long parseTimeNanos) {
102+
bytesSavedByZeroCopy.inc(bytesSaved);
103+
zeroCopyParseTimeNanos.inc(parseTimeNanos);
104+
}
105+
106+
@Override
107+
public void onFallbackNotKnownLength() {
108+
fallbackNotKnownLength.inc();
109+
}
110+
111+
@Override
112+
public void onFallbackNotDetachable() {
113+
fallbackNotDetachable.inc();
114+
}
115+
116+
@Override
117+
public void onFallbackNotByteBuffer() {
118+
fallbackNotByteBuffer.inc();
119+
}
120+
}
121+
65122
@VisibleForTesting
66123
public long zeroCopyMessages() {
67124
return zeroCopyMessages.getCount();
@@ -76,4 +133,4 @@ public long nonZeroCopyMessages() {
76133
public long releasedMessages() {
77134
return releasedMessages.getCount();
78135
}
79-
}
136+
}

ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcClientProtocolService.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ void closeAllExisting(RaftGroupId groupId) {
161161
this.executor = executor;
162162
this.zeroCopyEnabled = zeroCopyEnabled;
163163
this.zeroCopyRequestMarshaller = new ZeroCopyMessageMarshaller<>(RaftClientRequestProto.getDefaultInstance(),
164-
zeroCopyMetrics::onZeroCopyMessage, zeroCopyMetrics::onNonZeroCopyMessage, zeroCopyMetrics::onReleasedMessage);
164+
zeroCopyMetrics::onZeroCopyClientRequest, zeroCopyMetrics::onNonZeroCopyMessage,
165+
zeroCopyMetrics::onReleasedMessage, zeroCopyMetrics.newMarshallerMetrics());
165166
zeroCopyMetrics.addUnreleased("client_protocol", zeroCopyRequestMarshaller::getUnclosedCount);
166167
}
167168

ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolService.java

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
import static org.apache.ratis.grpc.GrpcUtil.addMethodWithCustomMarshaller;
4949
import static org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc.getAppendEntriesMethod;
50+
import static org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc.getInstallSnapshotMethod;
5051

5152
class GrpcServerProtocolService extends RaftServerProtocolServiceImplBase {
5253
public static final Logger LOG = LoggerFactory.getLogger(GrpcServerProtocolService.class);
@@ -59,10 +60,12 @@ private enum BatchLogKey implements BatchLogger.Key {
5960
static class PendingServerRequest<REQUEST> {
6061
private final AtomicReference<ReferenceCountedObject<REQUEST>> requestRef;
6162
private final CompletableFuture<Void> future = new CompletableFuture<>();
63+
private final String requestString;
6264

63-
PendingServerRequest(ReferenceCountedObject<REQUEST> requestRef) {
65+
PendingServerRequest(ReferenceCountedObject<REQUEST> requestRef, String requestString) {
6466
requestRef.retain();
6567
this.requestRef = new AtomicReference<>(requestRef);
68+
this.requestString = requestString;
6669
}
6770

6871
REQUEST getRequest() {
@@ -71,6 +74,10 @@ REQUEST getRequest() {
7174
.orElse(null);
7275
}
7376

77+
String getRequestString() {
78+
return requestString;
79+
}
80+
7481
CompletableFuture<Void> getFuture() {
7582
return future;
7683
}
@@ -104,8 +111,7 @@ String getName() {
104111

105112
private String getPreviousRequestString() {
106113
return Optional.ofNullable(previousOnNext.get())
107-
.map(PendingServerRequest::getRequest)
108-
.map(this::requestToString)
114+
.map(PendingServerRequest::getRequestString)
109115
.orElse(null);
110116
}
111117

@@ -177,7 +183,9 @@ public void onNext(REQUEST request) {
177183
return;
178184
}
179185

180-
final PendingServerRequest<REQUEST> current = new PendingServerRequest<>(requestRef);
186+
final PendingServerRequest<REQUEST> current
187+
= new PendingServerRequest<>(requestRef, requestToString(requestRef.get()));
188+
current.getFuture().whenComplete((r, e) -> current.release());
181189
final long callId = getCallId(current.getRequest());
182190
final boolean isHeartbeat = isHeartbeat(current.getRequest());
183191
final Optional<PendingServerRequest<REQUEST>> previous = Optional.ofNullable(previousOnNext.getAndSet(current));
@@ -243,15 +251,23 @@ private void releaseLast() {
243251
private final RaftServer server;
244252
private final boolean zeroCopyEnabled;
245253
private final ZeroCopyMessageMarshaller<AppendEntriesRequestProto> zeroCopyRequestMarshaller;
254+
private final ZeroCopyMessageMarshaller<InstallSnapshotRequestProto> zeroCopyInstallSnapshotMarshaller;
246255

247256
GrpcServerProtocolService(Supplier<RaftPeerId> idSupplier, RaftServer server, boolean zeroCopyEnabled,
248257
ZeroCopyMetrics zeroCopyMetrics) {
249258
this.idSupplier = idSupplier;
250259
this.server = server;
251260
this.zeroCopyEnabled = zeroCopyEnabled;
252261
this.zeroCopyRequestMarshaller = new ZeroCopyMessageMarshaller<>(AppendEntriesRequestProto.getDefaultInstance(),
253-
zeroCopyMetrics::onZeroCopyMessage, zeroCopyMetrics::onNonZeroCopyMessage, zeroCopyMetrics::onReleasedMessage);
262+
zeroCopyMetrics::onZeroCopyAppendEntries, zeroCopyMetrics::onNonZeroCopyMessage,
263+
zeroCopyMetrics::onReleasedMessage, zeroCopyMetrics.newMarshallerMetrics());
264+
this.zeroCopyInstallSnapshotMarshaller = new ZeroCopyMessageMarshaller<>(
265+
InstallSnapshotRequestProto.getDefaultInstance(),
266+
zeroCopyMetrics::onZeroCopyInstallSnapshot, zeroCopyMetrics::onNonZeroCopyMessage,
267+
zeroCopyMetrics::onReleasedMessage, zeroCopyMetrics.newMarshallerMetrics());
254268
zeroCopyMetrics.addUnreleased("server_protocol", zeroCopyRequestMarshaller::getUnclosedCount);
269+
zeroCopyMetrics.addUnreleased("server_protocol_install_snapshot",
270+
zeroCopyInstallSnapshotMarshaller::getUnclosedCount);
255271
}
256272

257273
RaftPeerId getId() {
@@ -268,9 +284,16 @@ ServerServiceDefinition bindServiceWithZeroCopy() {
268284

269285
// Add appendEntries with zero copy marshaller.
270286
addMethodWithCustomMarshaller(orig, builder, getAppendEntriesMethod(), zeroCopyRequestMarshaller);
287+
// Add installSnapshot with zero copy marshaller for zero-copy counters/metrics.
288+
addMethodWithCustomMarshaller(orig, builder, getInstallSnapshotMethod(), zeroCopyInstallSnapshotMarshaller);
271289
// Add remaining methods as is.
290+
final String appendEntriesMethod = getAppendEntriesMethod().getFullMethodName();
291+
final String installSnapshotMethod = getInstallSnapshotMethod().getFullMethodName();
272292
orig.getMethods().stream().filter(
273-
x -> !x.getMethodDescriptor().getFullMethodName().equals(getAppendEntriesMethod().getFullMethodName())
293+
x -> {
294+
final String methodName = x.getMethodDescriptor().getFullMethodName();
295+
return !methodName.equals(appendEntriesMethod) && !methodName.equals(installSnapshotMethod);
296+
}
274297
).forEach(
275298
builder::addMethod
276299
);
@@ -365,6 +388,11 @@ CompletableFuture<InstallSnapshotReplyProto> process(InstallSnapshotRequestProto
365388
return CompletableFuture.completedFuture(server.installSnapshot(request));
366389
}
367390

391+
@Override
392+
void release(InstallSnapshotRequestProto request) {
393+
zeroCopyInstallSnapshotMarshaller.release(request);
394+
}
395+
368396
@Override
369397
long getCallId(InstallSnapshotRequestProto request) {
370398
return request.getServerRequest().getCallId();

ratis-grpc/src/main/java/org/apache/ratis/grpc/util/ZeroCopyMessageMarshaller.java

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,23 @@
5555
public class ZeroCopyMessageMarshaller<T extends MessageLite> implements PrototypeMarshaller<T> {
5656
static final Logger LOG = LoggerFactory.getLogger(ZeroCopyMessageMarshaller.class);
5757

58+
public interface Metrics {
59+
default void onZeroCopyParse(long bytesSaved, long parseTimeNanos) {
60+
}
61+
62+
default void onFallbackNotKnownLength() {
63+
}
64+
65+
default void onFallbackNotDetachable() {
66+
}
67+
68+
default void onFallbackNotByteBuffer() {
69+
}
70+
}
71+
72+
private static final Metrics NOOP_METRICS = new Metrics() {
73+
};
74+
5875
private final String name;
5976
private final Map<T, InputStream> unclosedStreams = Collections.synchronizedMap(new IdentityHashMap<>());
6077
private final Parser<T> parser;
@@ -63,13 +80,19 @@ public class ZeroCopyMessageMarshaller<T extends MessageLite> implements Prototy
6380
private final Consumer<T> zeroCopyCount;
6481
private final Consumer<T> nonZeroCopyCount;
6582
private final Consumer<T> releasedCount;
83+
private final Metrics metrics;
6684

6785
public ZeroCopyMessageMarshaller(T defaultInstance) {
68-
this(defaultInstance, m -> {}, m -> {}, m -> {});
86+
this(defaultInstance, m -> {}, m -> {}, m -> {}, NOOP_METRICS);
6987
}
7088

7189
public ZeroCopyMessageMarshaller(T defaultInstance, Consumer<T> zeroCopyCount, Consumer<T> nonZeroCopyCount,
7290
Consumer<T> releasedCount) {
91+
this(defaultInstance, zeroCopyCount, nonZeroCopyCount, releasedCount, NOOP_METRICS);
92+
}
93+
94+
public ZeroCopyMessageMarshaller(T defaultInstance, Consumer<T> zeroCopyCount, Consumer<T> nonZeroCopyCount,
95+
Consumer<T> releasedCount, Metrics metrics) {
7396
this.name = JavaUtils.getClassSimpleName(defaultInstance.getClass()) + "-Marshaller";
7497
@SuppressWarnings("unchecked")
7598
final Parser<T> p = (Parser<T>) defaultInstance.getParserForType();
@@ -79,6 +102,7 @@ public ZeroCopyMessageMarshaller(T defaultInstance, Consumer<T> zeroCopyCount, C
79102
this.zeroCopyCount = zeroCopyCount;
80103
this.nonZeroCopyCount = nonZeroCopyCount;
81104
this.releasedCount = releasedCount;
105+
this.metrics = metrics == null ? NOOP_METRICS : metrics;
82106
}
83107

84108
@Override
@@ -158,28 +182,36 @@ private List<ByteString> getByteStrings(InputStream detached, int exactSize) thr
158182
*/
159183
private T parseZeroCopy(InputStream stream) throws IOException {
160184
if (!(stream instanceof KnownLength)) {
185+
metrics.onFallbackNotKnownLength();
161186
LOG.debug("stream is not KnownLength: {}", stream.getClass());
162187
return null;
163188
}
164189
if (!(stream instanceof Detachable)) {
190+
metrics.onFallbackNotDetachable();
165191
LOG.debug("stream is not Detachable: {}", stream.getClass());
166192
return null;
167193
}
168194
if (!(stream instanceof HasByteBuffer)) {
195+
metrics.onFallbackNotByteBuffer();
169196
LOG.debug("stream is not HasByteBuffer: {}", stream.getClass());
170197
return null;
171198
}
172199
if (!((HasByteBuffer) stream).byteBufferSupported()) {
200+
metrics.onFallbackNotByteBuffer();
173201
LOG.debug("stream is HasByteBuffer but not byteBufferSupported: {}", stream.getClass());
174202
return null;
175203
}
176204

177205
final int exactSize = stream.available();
178206
InputStream detached = ((Detachable) stream).detach();
207+
// Measure only the zero-copy parse path (detach + parse).
208+
final long startNanos = System.nanoTime();
179209
try {
180210
final List<ByteString> byteStrings = getByteStrings(detached, exactSize);
181211
final T message = parseFrom(byteStrings, exactSize);
182212

213+
metrics.onZeroCopyParse(exactSize, System.nanoTime() - startNanos);
214+
183215
final InputStream previous = unclosedStreams.put(message, detached);
184216
Preconditions.assertNull(previous, "previous");
185217

0 commit comments

Comments
 (0)