Skip to content

Commit a1d3eb5

Browse files
authored
feat: streaming transformer (numaproj#196)
Signed-off-by: Sidhant Kohli <[email protected]>
1 parent 6ad59bd commit a1d3eb5

File tree

12 files changed

+487
-229
lines changed

12 files changed

+487
-229
lines changed

pynumaflow/proto/sourcetransformer/transform.proto

+25-7
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,35 @@ service SourceTransform {
99
// SourceTransformFn applies a function to each request element.
1010
// In addition to map function, SourceTransformFn also supports assigning a new event time to response.
1111
// SourceTransformFn can be used only at source vertex by source data transformer.
12-
rpc SourceTransformFn(SourceTransformRequest) returns (SourceTransformResponse);
12+
rpc SourceTransformFn(stream SourceTransformRequest) returns (stream SourceTransformResponse);
1313

1414
// IsReady is the heartbeat endpoint for gRPC.
1515
rpc IsReady(google.protobuf.Empty) returns (ReadyResponse);
1616
}
1717

18+
/*
19+
* Handshake message between client and server to indicate the start of transmission.
20+
*/
21+
message Handshake {
22+
// Required field indicating the start of transmission.
23+
bool sot = 1;
24+
}
25+
1826
/**
1927
* SourceTransformerRequest represents a request element.
2028
*/
2129
message SourceTransformRequest {
22-
repeated string keys = 1;
23-
bytes value = 2;
24-
google.protobuf.Timestamp event_time = 3;
25-
google.protobuf.Timestamp watermark = 4;
26-
map<string, string> headers = 5;
30+
message Request {
31+
repeated string keys = 1;
32+
bytes value = 2;
33+
google.protobuf.Timestamp event_time = 3;
34+
google.protobuf.Timestamp watermark = 4;
35+
map<string, string> headers = 5;
36+
// This ID is used to uniquely identify a transform request
37+
string id = 6;
38+
}
39+
Request request = 1;
40+
optional Handshake handshake = 2;
2741
}
2842

2943
/**
@@ -37,11 +51,15 @@ message SourceTransformResponse {
3751
repeated string tags = 4;
3852
}
3953
repeated Result results = 1;
54+
// This ID is used to refer the responses to the request it corresponds to.
55+
string id = 2;
56+
// Handshake message between client and server to indicate the start of transmission.
57+
optional Handshake handshake = 3;
4058
}
4159

4260
/**
4361
* ReadyResponse is the health check result.
4462
*/
4563
message ReadyResponse {
4664
bool ready = 1;
47-
}
65+
}

pynumaflow/proto/sourcetransformer/transform_pb2.py

+19-15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pynumaflow/proto/sourcetransformer/transform_pb2.pyi

+52-24
Original file line numberDiff line numberDiff line change
@@ -13,37 +13,58 @@ from typing import (
1313

1414
DESCRIPTOR: _descriptor.FileDescriptor
1515

16+
class Handshake(_message.Message):
17+
__slots__ = ("sot",)
18+
SOT_FIELD_NUMBER: _ClassVar[int]
19+
sot: bool
20+
def __init__(self, sot: bool = ...) -> None: ...
21+
1622
class SourceTransformRequest(_message.Message):
17-
__slots__ = ("keys", "value", "event_time", "watermark", "headers")
23+
__slots__ = ("request", "handshake")
24+
25+
class Request(_message.Message):
26+
__slots__ = ("keys", "value", "event_time", "watermark", "headers", "id")
1827

19-
class HeadersEntry(_message.Message):
20-
__slots__ = ("key", "value")
21-
KEY_FIELD_NUMBER: _ClassVar[int]
28+
class HeadersEntry(_message.Message):
29+
__slots__ = ("key", "value")
30+
KEY_FIELD_NUMBER: _ClassVar[int]
31+
VALUE_FIELD_NUMBER: _ClassVar[int]
32+
key: str
33+
value: str
34+
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
35+
KEYS_FIELD_NUMBER: _ClassVar[int]
2236
VALUE_FIELD_NUMBER: _ClassVar[int]
23-
key: str
24-
value: str
25-
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
26-
KEYS_FIELD_NUMBER: _ClassVar[int]
27-
VALUE_FIELD_NUMBER: _ClassVar[int]
28-
EVENT_TIME_FIELD_NUMBER: _ClassVar[int]
29-
WATERMARK_FIELD_NUMBER: _ClassVar[int]
30-
HEADERS_FIELD_NUMBER: _ClassVar[int]
31-
keys: _containers.RepeatedScalarFieldContainer[str]
32-
value: bytes
33-
event_time: _timestamp_pb2.Timestamp
34-
watermark: _timestamp_pb2.Timestamp
35-
headers: _containers.ScalarMap[str, str]
37+
EVENT_TIME_FIELD_NUMBER: _ClassVar[int]
38+
WATERMARK_FIELD_NUMBER: _ClassVar[int]
39+
HEADERS_FIELD_NUMBER: _ClassVar[int]
40+
ID_FIELD_NUMBER: _ClassVar[int]
41+
keys: _containers.RepeatedScalarFieldContainer[str]
42+
value: bytes
43+
event_time: _timestamp_pb2.Timestamp
44+
watermark: _timestamp_pb2.Timestamp
45+
headers: _containers.ScalarMap[str, str]
46+
id: str
47+
def __init__(
48+
self,
49+
keys: _Optional[_Iterable[str]] = ...,
50+
value: _Optional[bytes] = ...,
51+
event_time: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
52+
watermark: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
53+
headers: _Optional[_Mapping[str, str]] = ...,
54+
id: _Optional[str] = ...,
55+
) -> None: ...
56+
REQUEST_FIELD_NUMBER: _ClassVar[int]
57+
HANDSHAKE_FIELD_NUMBER: _ClassVar[int]
58+
request: SourceTransformRequest.Request
59+
handshake: Handshake
3660
def __init__(
3761
self,
38-
keys: _Optional[_Iterable[str]] = ...,
39-
value: _Optional[bytes] = ...,
40-
event_time: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
41-
watermark: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
42-
headers: _Optional[_Mapping[str, str]] = ...,
62+
request: _Optional[_Union[SourceTransformRequest.Request, _Mapping]] = ...,
63+
handshake: _Optional[_Union[Handshake, _Mapping]] = ...,
4364
) -> None: ...
4465

4566
class SourceTransformResponse(_message.Message):
46-
__slots__ = ("results",)
67+
__slots__ = ("results", "id", "handshake")
4768

4869
class Result(_message.Message):
4970
__slots__ = ("keys", "value", "event_time", "tags")
@@ -63,9 +84,16 @@ class SourceTransformResponse(_message.Message):
6384
tags: _Optional[_Iterable[str]] = ...,
6485
) -> None: ...
6586
RESULTS_FIELD_NUMBER: _ClassVar[int]
87+
ID_FIELD_NUMBER: _ClassVar[int]
88+
HANDSHAKE_FIELD_NUMBER: _ClassVar[int]
6689
results: _containers.RepeatedCompositeFieldContainer[SourceTransformResponse.Result]
90+
id: str
91+
handshake: Handshake
6792
def __init__(
68-
self, results: _Optional[_Iterable[_Union[SourceTransformResponse.Result, _Mapping]]] = ...
93+
self,
94+
results: _Optional[_Iterable[_Union[SourceTransformResponse.Result, _Mapping]]] = ...,
95+
id: _Optional[str] = ...,
96+
handshake: _Optional[_Union[Handshake, _Mapping]] = ...,
6997
) -> None: ...
7098

7199
class ReadyResponse(_message.Message):

pynumaflow/proto/sourcetransformer/transform_pb2_grpc.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(self, channel):
1515
Args:
1616
channel: A grpc.Channel.
1717
"""
18-
self.SourceTransformFn = channel.unary_unary(
18+
self.SourceTransformFn = channel.stream_stream(
1919
"/sourcetransformer.v1.SourceTransform/SourceTransformFn",
2020
request_serializer=transform__pb2.SourceTransformRequest.SerializeToString,
2121
response_deserializer=transform__pb2.SourceTransformResponse.FromString,
@@ -30,7 +30,7 @@ def __init__(self, channel):
3030
class SourceTransformServicer(object):
3131
"""Missing associated documentation comment in .proto file."""
3232

33-
def SourceTransformFn(self, request, context):
33+
def SourceTransformFn(self, request_iterator, context):
3434
"""SourceTransformFn applies a function to each request element.
3535
In addition to map function, SourceTransformFn also supports assigning a new event time to response.
3636
SourceTransformFn can be used only at source vertex by source data transformer.
@@ -48,7 +48,7 @@ def IsReady(self, request, context):
4848

4949
def add_SourceTransformServicer_to_server(servicer, server):
5050
rpc_method_handlers = {
51-
"SourceTransformFn": grpc.unary_unary_rpc_method_handler(
51+
"SourceTransformFn": grpc.stream_stream_rpc_method_handler(
5252
servicer.SourceTransformFn,
5353
request_deserializer=transform__pb2.SourceTransformRequest.FromString,
5454
response_serializer=transform__pb2.SourceTransformResponse.SerializeToString,
@@ -71,7 +71,7 @@ class SourceTransform(object):
7171

7272
@staticmethod
7373
def SourceTransformFn(
74-
request,
74+
request_iterator,
7575
target,
7676
options=(),
7777
channel_credentials=None,
@@ -82,8 +82,8 @@ def SourceTransformFn(
8282
timeout=None,
8383
metadata=None,
8484
):
85-
return grpc.experimental.unary_unary(
86-
request,
85+
return grpc.experimental.stream_stream(
86+
request_iterator,
8787
target,
8888
"/sourcetransformer.v1.SourceTransform/SourceTransformFn",
8989
transform__pb2.SourceTransformRequest.SerializeToString,

pynumaflow/sourcetransformer/multiproc_server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pynumaflow.info.types import ServerInfo, MINIMUM_NUMAFLOW_VERSION, ContainerType
2-
from pynumaflow.sourcetransformer.servicer.server import SourceTransformServicer
2+
from pynumaflow.sourcetransformer.servicer._servicer import SourceTransformServicer
33

44
from pynumaflow.shared.server import start_multiproc_server
55

pynumaflow/sourcetransformer/server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pynumaflow.shared import NumaflowServer
1212
from pynumaflow.shared.server import sync_server_start
1313
from pynumaflow.sourcetransformer._dtypes import SourceTransformCallable
14-
from pynumaflow.sourcetransformer.servicer.server import SourceTransformServicer
14+
from pynumaflow.sourcetransformer.servicer._servicer import SourceTransformServicer
1515

1616

1717
class SourceTransformServer(NumaflowServer):

0 commit comments

Comments
 (0)