@@ -175,26 +175,37 @@ def create_grpc_server(
175175 renderer_service : RendererService ,
176176 tcp_port : str = "" ,
177177) -> grpc .aio .Server :
178- # Performance optimizations
179- options = [
180- ("grpc.max_send_message_length" , 100 * 1024 * 1024 ), # 100MB
181- ("grpc.max_receive_message_length" , 100 * 1024 * 1024 ), # 100MB
182- ("grpc.keepalive_time_ms" , 7200000 ), # 2 hours
183- ("grpc.keepalive_timeout_ms" , 20000 ), # 20 seconds
184- ("grpc.keepalive_permit_without_calls" , 1 ),
185- ("grpc.http2.max_pings_without_data" , 0 ),
186- (
187- "grpc.http2.min_time_between_pings_ms" ,
188- 10000 ,
189- ), # 10s - tolerate frequent pings from Envoy/Istio sidecars
190- ("grpc.http2.min_ping_interval_without_data_ms" , 10000 ),
191- ("grpc.http2.max_frame_size" , 8192 ),
192- ]
193-
194- server = grpc .aio .server (options = options )
178+ """Create an async gRPC server.
179+
180+ Args:
181+ tokenizer_service: The tokenizer service implementation
182+ uds_socket_path: Path to Unix Domain Socket
183+ renderer_service: The renderer service wrapping OpenAIServingRender
184+ tcp_port: TCP port for testing only (leave empty for production)
185+ """
186+ server = grpc .aio .server (
187+ options = [
188+ ("grpc.max_send_message_length" , 100 * 1024 * 1024 ), # 100MB
189+ ("grpc.max_receive_message_length" , 100 * 1024 * 1024 ), # 100MB
190+ ("grpc.keepalive_time_ms" , 7200000 ), # 2 hours
191+ ("grpc.keepalive_timeout_ms" , 20000 ), # 20 seconds
192+ ("grpc.keepalive_permit_without_calls" , 1 ),
193+ ("grpc.http2.max_pings_without_data" , 0 ),
194+ (
195+ "grpc.http2.min_time_between_pings_ms" ,
196+ 10000 ,
197+ ), # 10s - tolerate frequent pings from Envoy/Istio sidecars
198+ ("grpc.http2.min_ping_interval_without_data_ms" , 10000 ),
199+ ("grpc.http2.max_frame_size" , 8192 ),
200+ ]
201+ )
195202 servicer = TokenizationServiceServicer (tokenizer_service , renderer_service )
203+
204+ # Register service
196205 tokenizer_pb2_grpc .add_TokenizationServiceServicer_to_server (servicer , server )
197206
207+ # Enable reflection for grpcurl and other tools (only if explicitly enabled)
208+ # Reflection increases the exposed surface area, so it's disabled by default
198209 enable_reflection = os .getenv ("ENABLE_GRPC_REFLECTION" , "" )
199210 if enable_reflection :
200211 SERVICE_NAMES = (
@@ -205,12 +216,13 @@ def create_grpc_server(
205216 logging .info ("gRPC reflection enabled" )
206217 else :
207218 logging .info (
208- "gRPC reflection disabled (set ENABLE_GRPC_REFLECTION=1 to enable)"
219+ "gRPC reflection disabled (set ` ENABLE_GRPC_REFLECTION=1` to enable)"
209220 )
210221
211222 server .add_insecure_port (f"unix://{ uds_socket_path } " )
212223 logging .info (f"gRPC server configured on { uds_socket_path } " )
213224
225+ # Optionally bind to TCP port (FOR TESTING ONLY)
214226 if tcp_port :
215227 server .add_insecure_port (f"0.0.0.0:{ tcp_port } " )
216228 logging .warning (f"TCP mode enabled on port { tcp_port } - FOR TESTING ONLY" )
0 commit comments