Open
Description
What happened + What you expected to happen
A ray.serve.exception.BackpressureError
raised from a Child deployment will not be handled correctly by its Parent deployment if that Parent is wrapped in a FastAPI Ingress. This results in a client 500
instead of 503
. Of note, it seems that this only occurs if the following are true:
- the Parent deployment is wrapped in a FastAPI ingress
- the BackpressureError is raised by a Child deployment
repro
import asyncio
import fastapi
from ray import serve
import ray.serve.exceptions
fastapi_app = fastapi.FastAPI()
@serve.deployment(max_ongoing_requests=1, max_queued_requests=1)
class Child:
async def __call__(self) -> None:
await asyncio.sleep(2.)
@serve.deployment
@serve.ingress(fastapi_app)
class Parent:
def __init__(self, child) -> None:
self.child = child
@fastapi_app.get("/")
async def _(self) -> None:
return await self.child.remote()
serve.run(Parent.bind(child=Child.bind()))
# -- repro
import requests
from concurrent.futures import ThreadPoolExecutor
# let serve start
import time
time.sleep(2.)
def get(x: int) -> None:
resp = requests.get(f"http://localhost:8000/")
print(resp.status_code)
with ThreadPoolExecutor(max_workers=4) as exc:
list(exc.map(get, range(4)))
relevant stacktrace
(ServeReplica:default:Parent pid=35932) ERROR 2025-03-06 16:17:52,466 default_Parent 92j6p785 fed4636b-b139-4ae8-9c0a-c78303d3d0da -- Request failed.
(ServeReplica:default:Parent pid=35932) Traceback (most recent call last):
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 472, in _handle_errors_and_metrics
(ServeReplica:default:Parent pid=35932) yield _status_code_callback
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 880, in _wrap_user_method_call
(ServeReplica:default:Parent pid=35932) yield status_code_callback
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 646, in handle_request_with_rejection
(ServeReplica:default:Parent pid=35932) async for result in self._call_user_generator(
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 583, in _call_user_generator
(ServeReplica:default:Parent pid=35932) raise e from None
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 1608, in call_user_method
(ServeReplica:default:Parent pid=35932) result, sync_gen_consumed = await self._call_func_or_gen(
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 1326, in _call_func_or_gen
(ServeReplica:default:Parent pid=35932) result = await result
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/http_util.py", line 502, in __call__
(ServeReplica:default:Parent pid=35932) await self._asgi_app(
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/fastapi/applications.py", line 1054, in __call__
(ServeReplica:default:Parent pid=35932) await super().__call__(scope, receive, send)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/applications.py", line 112, in __call__
(ServeReplica:default:Parent pid=35932) await self.middleware_stack(scope, receive, send)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 187, in __call__
(ServeReplica:default:Parent pid=35932) raise exc
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 165, in __call__
(ServeReplica:default:Parent pid=35932) await self.app(scope, receive, _send)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
(ServeReplica:default:Parent pid=35932) await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
(ServeReplica:default:Parent pid=35932) raise exc
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
(ServeReplica:default:Parent pid=35932) await app(scope, receive, sender)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 714, in __call__
(ServeReplica:default:Parent pid=35932) await self.middleware_stack(scope, receive, send)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 734, in app
(ServeReplica:default:Parent pid=35932) await route.handle(scope, receive, send)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 288, in handle
(ServeReplica:default:Parent pid=35932) await self.app(scope, receive, send)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 76, in app
(ServeReplica:default:Parent pid=35932) await wrap_app_handling_exceptions(app, request)(scope, receive, send)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
(ServeReplica:default:Parent pid=35932) raise exc
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
(ServeReplica:default:Parent pid=35932) await app(scope, receive, sender)
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 73, in app
(ServeReplica:default:Parent pid=35932) response = await f(request)
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/fastapi/routing.py", line 301, in app
(ServeReplica:default:Parent pid=35932) raw_response = await run_endpoint_function(
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
(ServeReplica:default:Parent pid=35932) return await dependant.call(**values)
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File "/Users/paul/workspace/tl-embed-robusto/backpressure_external.py", line 24, in _
(ServeReplica:default:Parent pid=35932) return await self.child.remote()
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/handle.py", line 403, in __await__
(ServeReplica:default:Parent pid=35932) replica_result = yield from self._fetch_future_result_async().__await__()
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/handle.py", line 283, in _fetch_future_result_async
(ServeReplica:default:Parent pid=35932) self._replica_result = await asyncio.wrap_future(
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/router.py", line 600, in assign_request
(ServeReplica:default:Parent pid=35932) with self._metrics_manager.wrap_request_assignment(request_meta):
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File "/opt/homebrew/Cellar/[email protected]/3.12.7/Frameworks/Python.framework/Versions/3.12/lib/python3.12/contextlib.py", line 137, in __enter__
(ServeReplica:default:Parent pid=35932) return next(self.gen)
(ServeReplica:default:Parent pid=35932) ^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932) File ".venv/lib/python3.12/site-packages/ray/serve/_private/router.py", line 139, in wrap_request_assignment
(ServeReplica:default:Parent pid=35932) raise e
(ServeReplica:default:Parent pid=35932) ray.serve.exceptions.BackPressureError: Request dropped due to backpressure (num_queued_requests=1, max_queued_requests=1).
(ServeReplica:default:Parent pid=35932) INFO 2025-03-06 16:17:52,467 default_Parent 92j6p785 fed4636b-b139-4ae8-9c0a-c78303d3d0da -- GET / 500 24.4ms
Versions / Dependencies
ray==2.43.0
python==3.12
Reproduction script
see above!
Issue Severity
Low: It annoys or frustrates me.