Skip to content

[Serve] ray.serve.exceptions.BackpressurError raised from child cause 500 with FastAPI Ingress #51145

Open
@paul-twelvelabs

Description

@paul-twelvelabs

What happened + What you expected to happen

A ray.serve.exception.BackpressureError raised from a Child deployment will not be handled correctly by its Parent deployment if that Parent is wrapped in a FastAPI Ingress. This results in a client 500 instead of 503. Of note, it seems that this only occurs if the following are true:

  1. the Parent deployment is wrapped in a FastAPI ingress
  2. the BackpressureError is raised by a Child deployment

repro

import asyncio

import fastapi
from ray import serve
import ray.serve.exceptions

fastapi_app = fastapi.FastAPI()


@serve.deployment(max_ongoing_requests=1, max_queued_requests=1)
class Child:
    async def __call__(self) -> None:
        await asyncio.sleep(2.)


@serve.deployment
@serve.ingress(fastapi_app)
class Parent:
    def __init__(self, child) -> None:
        self.child = child

    @fastapi_app.get("/")
    async def _(self) -> None:
        return await self.child.remote()


serve.run(Parent.bind(child=Child.bind()))

# -- repro
import requests
from concurrent.futures import ThreadPoolExecutor

# let serve start
import time
time.sleep(2.)

def get(x: int) -> None:
    resp = requests.get(f"http://localhost:8000/")
    print(resp.status_code)


with ThreadPoolExecutor(max_workers=4) as exc:
    list(exc.map(get, range(4)))

relevant stacktrace

(ServeReplica:default:Parent pid=35932) ERROR 2025-03-06 16:17:52,466 default_Parent 92j6p785 fed4636b-b139-4ae8-9c0a-c78303d3d0da -- Request failed.
(ServeReplica:default:Parent pid=35932) Traceback (most recent call last):
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 472, in _handle_errors_and_metrics
(ServeReplica:default:Parent pid=35932)     yield _status_code_callback
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 880, in _wrap_user_method_call
(ServeReplica:default:Parent pid=35932)     yield status_code_callback
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 646, in handle_request_with_rejection
(ServeReplica:default:Parent pid=35932)     async for result in self._call_user_generator(
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 583, in _call_user_generator
(ServeReplica:default:Parent pid=35932)     raise e from None
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 1608, in call_user_method
(ServeReplica:default:Parent pid=35932)     result, sync_gen_consumed = await self._call_func_or_gen(
(ServeReplica:default:Parent pid=35932)                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/replica.py", line 1326, in _call_func_or_gen
(ServeReplica:default:Parent pid=35932)     result = await result
(ServeReplica:default:Parent pid=35932)              ^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/http_util.py", line 502, in __call__
(ServeReplica:default:Parent pid=35932)     await self._asgi_app(
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/fastapi/applications.py", line 1054, in __call__
(ServeReplica:default:Parent pid=35932)     await super().__call__(scope, receive, send)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/applications.py", line 112, in __call__
(ServeReplica:default:Parent pid=35932)     await self.middleware_stack(scope, receive, send)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 187, in __call__
(ServeReplica:default:Parent pid=35932)     raise exc
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 165, in __call__
(ServeReplica:default:Parent pid=35932)     await self.app(scope, receive, _send)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
(ServeReplica:default:Parent pid=35932)     await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
(ServeReplica:default:Parent pid=35932)     raise exc
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
(ServeReplica:default:Parent pid=35932)     await app(scope, receive, sender)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 714, in __call__
(ServeReplica:default:Parent pid=35932)     await self.middleware_stack(scope, receive, send)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 734, in app
(ServeReplica:default:Parent pid=35932)     await route.handle(scope, receive, send)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 288, in handle
(ServeReplica:default:Parent pid=35932)     await self.app(scope, receive, send)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 76, in app
(ServeReplica:default:Parent pid=35932)     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
(ServeReplica:default:Parent pid=35932)     raise exc
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
(ServeReplica:default:Parent pid=35932)     await app(scope, receive, sender)
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/starlette/routing.py", line 73, in app
(ServeReplica:default:Parent pid=35932)     response = await f(request)
(ServeReplica:default:Parent pid=35932)                ^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/fastapi/routing.py", line 301, in app
(ServeReplica:default:Parent pid=35932)     raw_response = await run_endpoint_function(
(ServeReplica:default:Parent pid=35932)                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
(ServeReplica:default:Parent pid=35932)     return await dependant.call(**values)
(ServeReplica:default:Parent pid=35932)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File "/Users/paul/workspace/tl-embed-robusto/backpressure_external.py", line 24, in _
(ServeReplica:default:Parent pid=35932)     return await self.child.remote()
(ServeReplica:default:Parent pid=35932)            ^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/handle.py", line 403, in __await__
(ServeReplica:default:Parent pid=35932)     replica_result = yield from self._fetch_future_result_async().__await__()
(ServeReplica:default:Parent pid=35932)                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/handle.py", line 283, in _fetch_future_result_async
(ServeReplica:default:Parent pid=35932)     self._replica_result = await asyncio.wrap_future(
(ServeReplica:default:Parent pid=35932)                            ^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/router.py", line 600, in assign_request
(ServeReplica:default:Parent pid=35932)     with self._metrics_manager.wrap_request_assignment(request_meta):
(ServeReplica:default:Parent pid=35932)          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File "/opt/homebrew/Cellar/[email protected]/3.12.7/Frameworks/Python.framework/Versions/3.12/lib/python3.12/contextlib.py", line 137, in __enter__
(ServeReplica:default:Parent pid=35932)     return next(self.gen)
(ServeReplica:default:Parent pid=35932)            ^^^^^^^^^^^^^^
(ServeReplica:default:Parent pid=35932)   File ".venv/lib/python3.12/site-packages/ray/serve/_private/router.py", line 139, in wrap_request_assignment
(ServeReplica:default:Parent pid=35932)     raise e
(ServeReplica:default:Parent pid=35932) ray.serve.exceptions.BackPressureError: Request dropped due to backpressure (num_queued_requests=1, max_queued_requests=1).
(ServeReplica:default:Parent pid=35932) INFO 2025-03-06 16:17:52,467 default_Parent 92j6p785 fed4636b-b139-4ae8-9c0a-c78303d3d0da -- GET / 500 24.4ms

Versions / Dependencies

ray==2.43.0
python==3.12

Reproduction script

see above!

Issue Severity

Low: It annoys or frustrates me.

Metadata

Metadata

Assignees

Labels

P1Issue that should be fixed within a few weeksbugSomething that is supposed to be working; but isn'tserveRay Serve Related Issue

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions