Skip to content

Commit d1cdb8f

Browse files
authored
chore: formalize gRPC errors in case of UDF exceptions (#166)
Signed-off-by: adarsh0728 <[email protected]>
1 parent 097467c commit d1cdb8f

File tree

12 files changed

+164
-47
lines changed

12 files changed

+164
-47
lines changed

src/main/java/io/numaproj/numaflow/batchmapper/Service.java

+14-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
package io.numaproj.numaflow.batchmapper;
22

3+
import com.google.protobuf.Any;
34
import com.google.protobuf.ByteString;
45
import com.google.protobuf.Empty;
6+
import com.google.rpc.Code;
7+
import com.google.rpc.DebugInfo;
58
import io.grpc.Status;
9+
import io.grpc.protobuf.StatusProto;
610
import io.grpc.stub.StreamObserver;
711
import io.numaproj.numaflow.map.v1.MapGrpc;
812
import io.numaproj.numaflow.map.v1.MapOuterClass;
13+
import io.numaproj.numaflow.shared.ExceptionUtils;
914
import lombok.AllArgsConstructor;
1015
import lombok.extern.slf4j.Slf4j;
1116

@@ -98,10 +103,15 @@ public void onNext(MapOuterClass.MapRequest mapRequest) {
98103
} catch (Exception e) {
99104
log.error("Encountered an error in batch map onNext", e);
100105
shutdownSignal.completeExceptionally(e);
101-
responseObserver.onError(Status.INTERNAL
102-
.withDescription(e.getMessage())
103-
.withCause(e)
104-
.asException());
106+
// Build gRPC Status
107+
com.google.rpc.Status status = com.google.rpc.Status.newBuilder()
108+
.setCode(Code.INTERNAL.getNumber())
109+
.setMessage(ExceptionUtils.ERR_BATCH_MAP_EXCEPTION + ": " + (e.getMessage() != null ? e.getMessage() : ""))
110+
.addDetails(Any.pack(DebugInfo.newBuilder()
111+
.setDetail(ExceptionUtils.getStackTrace(e))
112+
.build()))
113+
.build();
114+
responseObserver.onError(StatusProto.toStatusRuntimeException(status));
105115
}
106116
}
107117

src/main/java/io/numaproj/numaflow/mapper/MapSupervisorActor.java

+14-4
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@
99
import akka.japi.pf.DeciderBuilder;
1010
import akka.japi.pf.ReceiveBuilder;
1111
import io.grpc.Status;
12+
import com.google.protobuf.Any;
13+
import com.google.rpc.Code;
14+
import com.google.rpc.DebugInfo;
15+
import io.grpc.protobuf.StatusProto;
1216
import io.grpc.stub.StreamObserver;
1317
import io.numaproj.numaflow.map.v1.MapOuterClass;
18+
import io.numaproj.numaflow.shared.ExceptionUtils;
1419
import lombok.extern.slf4j.Slf4j;
1520

1621
import java.util.Optional;
@@ -106,10 +111,15 @@ private void handleFailure(Exception e) {
106111
userException = e;
107112
// only send the very first exception to the client
108113
// one exception should trigger a container restart
109-
responseObserver.onError(Status.INTERNAL
110-
.withDescription(e.getMessage())
111-
.withCause(e)
112-
.asException());
114+
// Build gRPC Status
115+
com.google.rpc.Status status = com.google.rpc.Status.newBuilder()
116+
.setCode(Code.INTERNAL.getNumber())
117+
.setMessage(ExceptionUtils.ERR_MAP_EXCEPTION + ": " + (e.getMessage() != null ? e.getMessage() : ""))
118+
.addDetails(Any.pack(DebugInfo.newBuilder()
119+
.setDetail(ExceptionUtils.getStackTrace(e))
120+
.build()))
121+
.build();
122+
responseObserver.onError(StatusProto.toStatusRuntimeException(status));
113123
}
114124
activeMapperCount--;
115125
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package io.numaproj.numaflow.shared;
2+
3+
import java.io.PrintWriter;
4+
import java.io.StringWriter;
5+
6+
public class ExceptionUtils {
7+
/**
8+
* Formalized exception error strings
9+
*/
10+
public static final String ERR_SOURCE_EXCEPTION = "UDF_EXECUTION_ERROR(source)";
11+
public static final String ERR_TRANSFORMER_EXCEPTION = "UDF_EXECUTION_ERROR(transformer)";
12+
public static final String ERR_SINK_EXCEPTION = "UDF_EXECUTION_ERROR(sink)";
13+
public static final String ERR_MAP_STREAM_EXCEPTION = "UDF_EXECUTION_ERROR(mapstream)";
14+
public static final String ERR_MAP_EXCEPTION = "UDF_EXECUTION_ERROR(map)";
15+
public static final String ERR_BATCH_MAP_EXCEPTION = "UDF_EXECUTION_ERROR(batchmap)";
16+
17+
/**
18+
* Converts the stack trace of an exception into a String.
19+
*
20+
* @param e the exception to extract the stack trace from
21+
* @return the stack trace as a String
22+
*/
23+
public static String getStackTrace(Throwable t) {
24+
if (t == null) {
25+
return "No exception provided.";
26+
}
27+
StringWriter sw = new StringWriter();
28+
t.printStackTrace(new PrintWriter(sw));
29+
return sw.toString();
30+
}
31+
}

src/main/java/io/numaproj/numaflow/sideinput/Server.java

+12-8
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public Server(SideInputRetriever sideInputRetriever) {
3232
/**
3333
* constructor to create gRPC server with gRPC config.
3434
*
35-
* @param grpcConfig to configure the max message size for grpc
35+
* @param grpcConfig to configure the max message size for grpc
3636
* @param sideInputRetriever to retrieve the side input
3737
*/
3838
public Server(SideInputRetriever sideInputRetriever, GRPCConfig grpcConfig) {
@@ -41,7 +41,8 @@ public Server(SideInputRetriever sideInputRetriever, GRPCConfig grpcConfig) {
4141
}
4242

4343
@VisibleForTesting
44-
protected Server(GRPCConfig grpcConfig, SideInputRetriever service, ServerInterceptor interceptor, String serverName) {
44+
protected Server(GRPCConfig grpcConfig, SideInputRetriever service, ServerInterceptor interceptor,
45+
String serverName) {
4546
this.grpcConfig = grpcConfig;
4647
this.server = new GrpcServerWrapper(
4748
interceptor,
@@ -67,8 +68,7 @@ public void start() throws Exception {
6768

6869
log.info(
6970
"server started, listening on {}",
70-
this.grpcConfig.isLocal() ?
71-
"localhost:" + this.grpcConfig.getPort() : this.grpcConfig.getSocketPath());
71+
this.grpcConfig.isLocal() ? "localhost:" + this.grpcConfig.getPort() : this.grpcConfig.getSocketPath());
7272

7373
// register shutdown hook to gracefully shut down the server
7474
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
@@ -83,11 +83,14 @@ public void start() throws Exception {
8383
}
8484

8585
/**
86-
* Blocks until the server has terminated. If the server is already terminated, this method
87-
* will return immediately. If the server is not yet terminated, this method will block the
86+
* Blocks until the server has terminated. If the server is already terminated,
87+
* this method
88+
* will return immediately. If the server is not yet terminated, this method
89+
* will block the
8890
* calling thread until the server has terminated.
8991
*
90-
* @throws InterruptedException if the current thread is interrupted while waiting
92+
* @throws InterruptedException if the current thread is interrupted while
93+
* waiting
9194
*/
9295
public void awaitTermination() throws InterruptedException {
9396
log.info("side input server is waiting for termination");
@@ -96,7 +99,8 @@ public void awaitTermination() throws InterruptedException {
9699
}
97100

98101
/**
99-
* Stop serving requests and shutdown resources. Await termination on the main thread since the
102+
* Stop serving requests and shutdown resources. Await termination on the main
103+
* thread since the
100104
* grpc library uses daemon threads.
101105
*
102106
* @throws InterruptedException if shutdown is interrupted

src/main/java/io/numaproj/numaflow/sideinput/Service.java

+4-7
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
package io.numaproj.numaflow.sideinput;
22

3-
import com.google.protobuf.ByteString;
43
import com.google.protobuf.Empty;
54
import io.grpc.stub.StreamObserver;
5+
import com.google.protobuf.ByteString;
66
import io.numaproj.numaflow.sideinput.v1.SideInputGrpc;
77
import io.numaproj.numaflow.sideinput.v1.Sideinput;
88
import lombok.AllArgsConstructor;
99
import lombok.extern.slf4j.Slf4j;
1010

11-
1211
@Slf4j
1312
@AllArgsConstructor
1413
class Service extends SideInputGrpc.SideInputImplBase {
@@ -29,11 +28,8 @@ public void retrieveSideInput(
2928
responseObserver);
3029
return;
3130
}
32-
33-
3431
// process request
3532
Message message = sideInputRetriever.retrieveSideInput();
36-
3733
// set response
3834
responseObserver.onNext(buildResponse(message));
3935
responseObserver.onCompleted();
@@ -50,8 +46,9 @@ public void isReady(Empty request, StreamObserver<Sideinput.ReadyResponse> respo
5046

5147
private Sideinput.SideInputResponse buildResponse(Message message) {
5248
return Sideinput.SideInputResponse.newBuilder()
53-
.setValue(message.getValue() == null ? ByteString.EMPTY : ByteString.copyFrom(
54-
message.getValue()))
49+
.setValue(message.getValue() == null ? ByteString.EMPTY
50+
: ByteString.copyFrom(
51+
message.getValue()))
5552
.setNoBroadcast(message.isNoBroadcast())
5653
.build();
5754
}

src/main/java/io/numaproj/numaflow/sinker/Service.java

+14
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
package io.numaproj.numaflow.sinker;
22

3+
import com.google.protobuf.Any;
34
import com.google.protobuf.Empty;
5+
import com.google.rpc.Code;
6+
import com.google.rpc.DebugInfo;
47
import io.grpc.Status;
8+
import io.grpc.protobuf.StatusProto;
59
import io.grpc.stub.StreamObserver;
10+
import io.numaproj.numaflow.shared.ExceptionUtils;
611
import io.numaproj.numaflow.sink.v1.SinkGrpc;
712
import io.numaproj.numaflow.sink.v1.SinkOuterClass;
813
import lombok.AllArgsConstructor;
@@ -100,6 +105,15 @@ public void onNext(SinkOuterClass.SinkRequest request) {
100105
responseObserver.onError(Status.INTERNAL
101106
.withDescription(e.getMessage())
102107
.asException());
108+
// Build gRPC Status
109+
com.google.rpc.Status status = com.google.rpc.Status.newBuilder()
110+
.setCode(Code.INTERNAL.getNumber())
111+
.setMessage(ExceptionUtils.ERR_SINK_EXCEPTION + ": " + (e.getMessage() != null ? e.getMessage() : ""))
112+
.addDetails(Any.pack(DebugInfo.newBuilder()
113+
.setDetail(ExceptionUtils.getStackTrace(e))
114+
.build()))
115+
.build();
116+
responseObserver.onError(StatusProto.toStatusRuntimeException(status));
103117
}
104118
}
105119

src/main/java/io/numaproj/numaflow/sourcer/Service.java

+20-9
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
package io.numaproj.numaflow.sourcer;
22

3+
import com.google.protobuf.Any;
34
import com.google.protobuf.Empty;
5+
import com.google.rpc.Code;
6+
import com.google.rpc.DebugInfo;
47
import io.grpc.Status;
8+
import io.grpc.protobuf.StatusProto;
59
import io.grpc.stub.StreamObserver;
10+
import io.numaproj.numaflow.shared.ExceptionUtils;
611
import io.numaproj.numaflow.source.v1.SourceGrpc;
712
import io.numaproj.numaflow.source.v1.SourceOuterClass;
813
import lombok.AllArgsConstructor;
@@ -15,7 +20,6 @@
1520

1621
import static io.numaproj.numaflow.source.v1.SourceGrpc.getPendingFnMethod;
1722

18-
1923
/**
2024
* Implementation of the gRPC service for the sourcer.
2125
*/
@@ -31,7 +35,8 @@ class Service extends SourceGrpc.SourceImplBase {
3135
* @param responseObserver the response observer
3236
*/
3337
@Override
34-
public StreamObserver<SourceOuterClass.ReadRequest> readFn(final StreamObserver<SourceOuterClass.ReadResponse> responseObserver) {
38+
public StreamObserver<SourceOuterClass.ReadRequest> readFn(
39+
final StreamObserver<SourceOuterClass.ReadResponse> responseObserver) {
3540
return new StreamObserver<>() {
3641
private boolean handshakeDone = false;
3742

@@ -80,10 +85,15 @@ public void onNext(SourceOuterClass.ReadRequest request) {
8085
} catch (Exception e) {
8186
log.error("Encountered error in readFn onNext", e);
8287
shutdownSignal.completeExceptionally(e);
83-
responseObserver.onError(Status.INTERNAL
84-
.withDescription(e.getMessage())
85-
.withCause(e)
86-
.asException());
88+
// Build gRPC Status
89+
com.google.rpc.Status status = com.google.rpc.Status.newBuilder()
90+
.setCode(Code.INTERNAL.getNumber())
91+
.setMessage(ExceptionUtils.ERR_SOURCE_EXCEPTION + ": " + (e.getMessage() != null ? e.getMessage() : ""))
92+
.addDetails(Any.pack(DebugInfo.newBuilder()
93+
.setDetail(ExceptionUtils.getStackTrace(e))
94+
.build()))
95+
.build();
96+
responseObserver.onError(StatusProto.toStatusRuntimeException(status));
8797
}
8898
}
8999

@@ -201,7 +211,8 @@ public void pendingFn(
201211
SourceOuterClass.PendingResponse.Result
202212
.newBuilder()
203213
.setCount(this.sourcer.getPending())
204-
.build()).build());
214+
.build())
215+
.build());
205216
responseObserver.onCompleted();
206217
}
207218

@@ -236,8 +247,8 @@ public void partitionsFn(
236247
responseObserver.onNext(SourceOuterClass.PartitionsResponse.newBuilder()
237248
.setResult(
238249
SourceOuterClass.PartitionsResponse.Result.newBuilder()
239-
.addAllPartitions(partitions)).
240-
build());
250+
.addAllPartitions(partitions))
251+
.build());
241252
responseObserver.onCompleted();
242253
}
243254
}

src/main/java/io/numaproj/numaflow/sourcetransformer/TransformSupervisorActor.java

+17-7
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,13 @@
88
import akka.actor.SupervisorStrategy;
99
import akka.japi.pf.DeciderBuilder;
1010
import akka.japi.pf.ReceiveBuilder;
11-
import io.grpc.Status;
1211
import io.grpc.stub.StreamObserver;
12+
import io.grpc.Status;
13+
import com.google.protobuf.Any;
14+
import com.google.rpc.Code;
15+
import com.google.rpc.DebugInfo;
16+
import io.grpc.protobuf.StatusProto;
17+
import io.numaproj.numaflow.shared.ExceptionUtils;
1318
import io.numaproj.numaflow.sourcetransformer.v1.Sourcetransformer;
1419
import lombok.extern.slf4j.Slf4j;
1520

@@ -144,10 +149,16 @@ private void handleFailure(Exception e) {
144149
userException = e;
145150
// only send the very first exception to the client
146151
// one exception should trigger a container restart
147-
responseObserver.onError(Status.INTERNAL
148-
.withDescription(e.getMessage())
149-
.withCause(e)
150-
.asException());
152+
153+
// Build gRPC Status
154+
com.google.rpc.Status status = com.google.rpc.Status.newBuilder()
155+
.setCode(Code.INTERNAL.getNumber())
156+
.setMessage(ExceptionUtils.ERR_TRANSFORMER_EXCEPTION + ": " + (e.getMessage() != null ? e.getMessage() : ""))
157+
.addDetails(Any.pack(DebugInfo.newBuilder()
158+
.setDetail(ExceptionUtils.getStackTrace(e))
159+
.build()))
160+
.build();
161+
responseObserver.onError(StatusProto.toStatusRuntimeException(status));
151162
}
152163
activeTransformersCount--;
153164
}
@@ -217,7 +228,6 @@ public SupervisorStrategy supervisorStrategy() {
217228
.asException());
218229
return SupervisorStrategy.stop();
219230
})
220-
.build()
221-
);
231+
.build());
222232
}
223233
}

src/test/java/io/numaproj/numaflow/batchmapper/ServerErrTest.java

+7-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import java.util.concurrent.ExecutionException;
1818

1919
import static org.junit.Assert.assertEquals;
20+
import static org.junit.Assert.assertNotNull;
21+
import static org.junit.Assert.assertTrue;
2022
import static org.junit.Assert.fail;
2123

2224
public class ServerErrTest {
@@ -85,9 +87,11 @@ public void testErrorFromUDF() {
8587
outputStreamObserver.done.get();
8688
fail("Expected exception not thrown");
8789
} catch (InterruptedException | ExecutionException e) {
88-
assertEquals(
89-
"INTERNAL: java.lang.RuntimeException: unknown exception",
90-
e.getCause().getMessage());
90+
String expectedSubstring = "UDF_EXECUTION_ERROR(batchmap)";
91+
String actualMessage = e.getMessage();
92+
assertNotNull("Error message should not be null", actualMessage);
93+
assertTrue("Expected substring '" + expectedSubstring + "' not found in error message: " + actualMessage,
94+
actualMessage.contains(expectedSubstring));
9195
}
9296
}
9397

src/test/java/io/numaproj/numaflow/mapper/ServerErrTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ public void testMapperFailure() {
127127
fail("Expected exception not thrown");
128128
} catch (Exception e) {
129129
assertEquals(
130-
"io.grpc.StatusRuntimeException: INTERNAL: unknown exception",
130+
"io.grpc.StatusRuntimeException: INTERNAL: UDF_EXECUTION_ERROR(map): unknown exception",
131131
e.getMessage());
132132
}
133133
}

0 commit comments

Comments
 (0)