Skip to content

Commit 4114e4b

Browse files
committed
clean diff
Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
1 parent 82ba8bf commit 4114e4b

File tree

1 file changed

+30
-18
lines changed

1 file changed

+30
-18
lines changed

services/uds_tokenizer/tokenizer_grpc_service.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -175,26 +175,37 @@ def create_grpc_server(
175175
renderer_service: RendererService,
176176
tcp_port: str = "",
177177
) -> grpc.aio.Server:
178-
# Performance optimizations
179-
options = [
180-
("grpc.max_send_message_length", 100 * 1024 * 1024), # 100MB
181-
("grpc.max_receive_message_length", 100 * 1024 * 1024), # 100MB
182-
("grpc.keepalive_time_ms", 7200000), # 2 hours
183-
("grpc.keepalive_timeout_ms", 20000), # 20 seconds
184-
("grpc.keepalive_permit_without_calls", 1),
185-
("grpc.http2.max_pings_without_data", 0),
186-
(
187-
"grpc.http2.min_time_between_pings_ms",
188-
10000,
189-
), # 10s - tolerate frequent pings from Envoy/Istio sidecars
190-
("grpc.http2.min_ping_interval_without_data_ms", 10000),
191-
("grpc.http2.max_frame_size", 8192),
192-
]
193-
194-
server = grpc.aio.server(options=options)
178+
"""Create an async gRPC server.
179+
180+
Args:
181+
tokenizer_service: The tokenizer service implementation
182+
uds_socket_path: Path to Unix Domain Socket
183+
renderer_service: The renderer service wrapping OpenAIServingRender
184+
tcp_port: TCP port for testing only (leave empty for production)
185+
"""
186+
server = grpc.aio.server(
187+
options=[
188+
("grpc.max_send_message_length", 100 * 1024 * 1024), # 100MB
189+
("grpc.max_receive_message_length", 100 * 1024 * 1024), # 100MB
190+
("grpc.keepalive_time_ms", 7200000), # 2 hours
191+
("grpc.keepalive_timeout_ms", 20000), # 20 seconds
192+
("grpc.keepalive_permit_without_calls", 1),
193+
("grpc.http2.max_pings_without_data", 0),
194+
(
195+
"grpc.http2.min_time_between_pings_ms",
196+
10000,
197+
), # 10s - tolerate frequent pings from Envoy/Istio sidecars
198+
("grpc.http2.min_ping_interval_without_data_ms", 10000),
199+
("grpc.http2.max_frame_size", 8192),
200+
]
201+
)
195202
servicer = TokenizationServiceServicer(tokenizer_service, renderer_service)
203+
204+
# Register service
196205
tokenizer_pb2_grpc.add_TokenizationServiceServicer_to_server(servicer, server)
197206

207+
# Enable reflection for grpcurl and other tools (only if explicitly enabled)
208+
# Reflection increases the exposed surface area, so it's disabled by default
198209
enable_reflection = os.getenv("ENABLE_GRPC_REFLECTION", "")
199210
if enable_reflection:
200211
SERVICE_NAMES = (
@@ -205,12 +216,13 @@ def create_grpc_server(
205216
logging.info("gRPC reflection enabled")
206217
else:
207218
logging.info(
208-
"gRPC reflection disabled (set ENABLE_GRPC_REFLECTION=1 to enable)"
219+
"gRPC reflection disabled (set `ENABLE_GRPC_REFLECTION=1` to enable)"
209220
)
210221

211222
server.add_insecure_port(f"unix://{uds_socket_path}")
212223
logging.info(f"gRPC server configured on {uds_socket_path}")
213224

225+
# Optionally bind to TCP port (FOR TESTING ONLY)
214226
if tcp_port:
215227
server.add_insecure_port(f"0.0.0.0:{tcp_port}")
216228
logging.warning(f"TCP mode enabled on port {tcp_port} - FOR TESTING ONLY")

0 commit comments

Comments
 (0)