Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ci/ray_ci/serve_hap_test_names.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//python/ray/serve/tests:test_api
//python/ray/serve/tests:test_backpressure
//python/ray/serve/tests:test_backpressure_grpc
//python/ray/serve/tests:test_batching
//python/ray/serve/tests:test_cli
//python/ray/serve/tests:test_cli_2
//python/ray/serve/tests:test_cli_4
//python/ray/serve/tests:test_controller_recovery
//python/ray/serve/tests:test_deploy
//python/ray/serve/tests:test_deploy_2
Expand All @@ -17,6 +19,7 @@
//python/ray/serve/tests:test_fastapi
//python/ray/serve/tests:test_gcs_failure
//python/ray/serve/tests:test_gradio
//python/ray/serve/tests:test_grpc
//python/ray/serve/tests:test_grpc_e2e
//python/ray/serve/tests:test_grpc_replica_wrapper
//python/ray/serve/tests:test_handle
Expand Down
16 changes: 16 additions & 0 deletions python/ray/serve/_private/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,9 +876,25 @@
"RAY_SERVE_HAPROXY_INGRESS_REQUEST_ROUTER_BUFSIZE", 262144
)

# HAProxy tuning flags
RAY_SERVE_HAPROXY_TUNE_BUFSIZE = get_env_int(
"RAY_SERVE_HAPROXY_TUNE_BUFSIZE", 16384 # 16KB
)
RAY_SERVE_HAPROXY_H2_MAX_FRAME_SIZE = get_env_int(
"RAY_SERVE_HAPROXY_H2_MAX_FRAME_SIZE", 1024 * 16
) # 16KB
RAY_SERVE_HAPROXY_H2_BE_INITIAL_WINDOW_SIZE = get_env_int(
"RAY_SERVE_HAPROXY_H2_BE_INITIAL_WINDOW_SIZE", 1024 * 64
) # 64KB
RAY_SERVE_HAPROXY_H2_BE_MAX_CONCURRENT_STREAMS = get_env_int(
"RAY_SERVE_HAPROXY_H2_BE_MAX_CONCURRENT_STREAMS", 100
)
RAY_SERVE_HAPROXY_H2_FE_INITIAL_WINDOW_SIZE = get_env_int(
"RAY_SERVE_HAPROXY_H2_FE_INITIAL_WINDOW_SIZE", 1024 * 64
) # 64KB
RAY_SERVE_HAPROXY_H2_FE_MAX_CONCURRENT_STREAMS = get_env_int(
"RAY_SERVE_HAPROXY_H2_FE_MAX_CONCURRENT_STREAMS", 100
)

# Escape hatch: when true, HAProxy forwards the (possibly truncated) request
# body to /internal/route and the router reads it. Off by default because for
Expand Down
290 changes: 249 additions & 41 deletions python/ray/serve/_private/haproxy.py

Large diffs are not rendered by default.

131 changes: 131 additions & 0 deletions python/ray/serve/_private/haproxy_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@
{%- endif %}
"""

# Same shape as HAPROXY_HEALTHZ_RULES_TEMPLATE, but emits gRPC trailers-only
# responses. The OK / UNAVAILABLE distinction maps to grpc-status 0 / 14
# (gRPC clients map HTTP 503 to UNAVAILABLE, which is why every shape here
# uses HTTP 200 and signals state through grpc-status).
HAPROXY_GRPC_HEALTHZ_RULES_TEMPLATE = """ # Health check endpoint (gRPC `Healthz`)
acl is_healthz path /ray.serve.RayServeAPIService/Healthz
# Suppress logging for health checks
http-request set-log-level silent if is_healthz
{%- if not health_info.healthy %}
# Override: force health checks to fail (used by drain/disable)
http-request return status 200 content-type application/grpc hdr grpc-status 14 hdr grpc-message "{{ health_info.health_message }}" if is_healthz
{%- elif backends %}
# OK if any backend has at least one server UP
{%- for backend in backends %}
acl backend_{{ backend.name or 'unknown' }}_server_up nbsrv({{ backend.name or 'unknown' }}) ge 1
{%- endfor %}
# Any backend with a server UP passes the health check (OR logic)
{%- for backend in backends %}
http-request return status 200 content-type application/grpc hdr grpc-status 0 if is_healthz backend_{{ backend.name or 'unknown' }}_server_up
{%- endfor %}
http-request return status 200 content-type application/grpc hdr grpc-status 14 hdr grpc-message "Service Unavailable" if is_healthz
{%- endif %}
Comment thread
akyang-anyscale marked this conversation as resolved.
"""

HAPROXY_CONFIG_TEMPLATE = """global
log {{ config.log_target }} local0 debug
stats socket {{ config.socket_path }} mode 666 level admin expose-fd listeners
Expand All @@ -39,6 +63,13 @@
{%- if config.hard_stop_after_s is not none %}
hard-stop-after {{ config.hard_stop_after_s }}s
{%- endif %}
{%- if config.grpc_enabled %}
tune.h2.max-frame-size {{ config.h2_max_frame_size }}
tune.h2.be.initial-window-size {{config.h2_be_initial_window_size}}
tune.h2.be.max-concurrent-streams {{config.h2_be_max_concurrent_streams}}
tune.h2.fe.initial-window-size {{config.h2_fe_initial_window_size}}
tune.h2.fe.max-concurrent-streams {{config.h2_fe_max_concurrent_streams}}
{%- endif %}
defaults
mode http
option log-health-checks
Expand Down Expand Up @@ -218,6 +249,106 @@
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if config.grpc_enabled %}
frontend grpc_frontend
# gRPC requires HTTP/2. HAProxy decodes H2 frames into HTTP request
# semantics in `mode http` when `proto h2` is on the bind line.
bind {{ config.grpc_frontend_host }}:{{ config.grpc_frontend_port }} proto h2
mode http
log global

{{ grpc_healthz_rules|safe }}

# ListApplications must aggregate across all apps, so it goes to the
# head-node fallback Serve proxy rather than an individual replica.
acl is_list_applications path /ray.serve.RayServeAPIService/ListApplications
{%- if grpc_fallback_backend_with_health_config %}
use_backend grpc_fallback_backend if is_list_applications
{%- else %}
http-request return status 200 content-type application/grpc hdr grpc-status 14 hdr grpc-message "ListApplications is unavailable" if is_list_applications
{%- endif %}

# Route per-app on the `application` metadata that Ray Serve clients attach.
{%- for backend in grpc_backends %}
acl is_{{ backend.name or 'unknown' }} req.hdr(application) -m str {{ backend.app_name }}
use_backend {{ backend.name or 'unknown' }} if is_{{ backend.name or 'unknown' }}
{%- endfor %}
{%- if grpc_backends|length == 1 %}
# With exactly one app deployed, route there regardless of metadata so
# clients can call it without setting the `application` header.
default_backend {{ grpc_backends[0].name or 'unknown' }}
{%- else %}
# Zero apps, or multiple apps without a matching `application` header.
default_backend default_grpc_backend
{%- endif %}

{%- if grpc_fallback_backend_with_health_config %}
{%- set backend = grpc_fallback_backend_with_health_config.backend %}
{%- set hc = grpc_fallback_backend_with_health_config.health_config %}
{%- if backend.servers %}
{%- set server = backend.servers[0] %}
backend grpc_fallback_backend
mode http
log global
# gRPC health check: replay a complete unary `Healthz` request via
# `tcp-check send-binary` and match the healthy message in the response.
# `http-check` can't be used because its body is truncated at the first NUL
# byte and a gRPC frame always starts with the NUL compression flag, so the
# server would get a message-less unary and stall until timeout.
option tcp-check
tcp-check connect
tcp-check send-binary {{ hc.grpc_healthcheck_request_hex }}
tcp-check expect binary {{ hc.grpc_healthcheck_expect_hex }}
{{ hc.default_server_directive }}
# `proto h2` makes HAProxy speak HTTP/2 cleartext to the fallback gRPC server.
server {{ server.name }} {{ server.host }}:{{ server.port }} proto h2 check
{%- endif %}
{%- endif %}
{%- if grpc_backends|length != 1 %}
backend default_grpc_backend
mode http
log global
# Trailers-only NOT_FOUND. gRPC clients surface this as
# grpc.StatusCode.NOT_FOUND; an HTTP 503 would map to UNAVAILABLE instead.
acl has_application_header req.hdr(application) -m found
http-request return status 200 content-type application/grpc hdr grpc-status 5 hdr grpc-message "Application '%[req.hdr(application)]' not found. Ping /ray.serve.RayServeAPIService/ListApplications for available applications." if has_application_header
http-request return status 200 content-type application/grpc hdr grpc-status 5 hdr grpc-message "Application metadata not set. Ping /ray.serve.RayServeAPIService/ListApplications for available applications."
{%- endif %}
{%- for item in grpc_backends_with_health_config %}
{%- set backend = item.backend %}
{%- set hc = item.health_config %}
backend {{ backend.name or 'unknown' }}
mode http
log global
http-reuse always
{%- if backend.timeout_connect_s is not none %}
timeout connect {{ backend.timeout_connect_s }}s
{%- endif %}
{%- if backend.timeout_server_s is not none %}
timeout server {{ backend.timeout_server_s }}s
{%- endif %}
{%- if backend.timeout_client_s is not none %}
timeout client {{ backend.timeout_client_s }}s
{%- endif %}
# gRPC health check: replay a complete unary `Healthz` request via
# `tcp-check send-binary` and match the healthy message in the response.
# `http-check` can't be used because its body is truncated at the first NUL
# byte and a gRPC frame always starts with the NUL compression flag, so the
# server would get a message-less unary and stall until timeout.
option tcp-check
tcp-check connect
tcp-check send-binary {{ hc.grpc_healthcheck_request_hex }}
tcp-check expect binary {{ hc.grpc_healthcheck_expect_hex }}
{{ hc.default_server_directive }}
# `proto h2` makes HAProxy speak HTTP/2 cleartext to backend gRPC servers.
{%- for server in backend.servers %}
server {{ server.name }} {{ server.host }}:{{ server.port }} proto h2 check
{%- endfor %}
{%- if backend.fallback_server %}
server {{ backend.fallback_server.name }} {{ backend.fallback_server.host }}:{{ backend.fallback_server.port }} proto h2 check backup
{%- endif %}
Comment thread
akyang-anyscale marked this conversation as resolved.
{%- endfor %}
{%- endif %}
listen stats
bind *:{{ config.stats_port }}
stats enable
Expand Down
5 changes: 4 additions & 1 deletion python/ray/serve/_private/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
RunningReplicaInfo,
)
from ray.serve._private.constants import (
RAY_SERVE_ENABLE_HA_PROXY,
SERVE_DEFAULT_APP_NAME,
SERVE_NAMESPACE,
)
Expand Down Expand Up @@ -1001,7 +1002,9 @@ def ping_grpc_healthz(channel, test_draining=False):
else:
response, call = stub.Healthz.with_call(request=request)
assert call.code() == grpc.StatusCode.OK
assert response.message == "success"
if not RAY_SERVE_ENABLE_HA_PROXY:
assert response.message == "success"
return True


def ping_grpc_call_method(channel, app_name, test_not_found=False):
Expand Down
4 changes: 2 additions & 2 deletions python/ray/serve/tests/test_cli_4.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_serving_request_through_grpc_proxy(ray_start_stop):
)

# Ensures Healthz method succeeding.
ping_grpc_healthz(channel)
wait_for_condition(ping_grpc_healthz, channel=channel)

# Ensures a custom defined method is responding correctly.
ping_grpc_call_method(channel, app1)
Expand Down Expand Up @@ -148,7 +148,7 @@ def test_grpc_proxy_model_composition(ray_start_stop):
)

# Ensures Healthz method succeeding.
ping_grpc_healthz(channel)
wait_for_condition(ping_grpc_healthz, channel=channel)

# Ensure model composition is responding correctly.
ping_fruit_stand(channel, app)
Expand Down
Loading
Loading