Skip to content

Commit 9bfa2f2

Browse files
joshyjosephJoohosivanantha321
authored
fix: make Uvicorn event loop configurable (kserve#4971)
Signed-off-by: Joshy Joseph <joshyjoseph.act@gmail.com> Co-authored-by: Jooho Lee <jlee@redhat.com> Co-authored-by: Sivanantham <90966311+sivanantha321@users.noreply.github.com>
1 parent 38536cb commit 9bfa2f2

6 files changed

Lines changed: 174 additions & 2 deletions

File tree

python/kserve/docs/EventLoop.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# HTTP Server Event Loop Configuration
2+
3+
KServe allows configuring the event loop implementation used by the HTTP server.
4+
This can be useful for performance tuning or for ensuring compatibility with
5+
specific runtime environments.
6+
7+
## Configuration
8+
9+
The event loop is configured using the `--event-loop` command-line argument.
10+
11+
Supported values:
12+
13+
- `auto` (default): Automatically select the event loop. If `uvloop` is installed,
14+
it will be used; otherwise, the standard `asyncio` event loop is used.
15+
- `asyncio`: Force the use of Python’s built-in `asyncio` event loop.
16+
- `uvloop`: Force the use of `uvloop` (requires `uvloop` to be installed).
17+
18+
## Example
19+
20+
```bash
21+
kserve start \
22+
--event-loop uvloop \
23+
--http_port 8080
24+
```
25+
26+
```python
27+
from kserve import ModelServer
28+
29+
server = ModelServer(
30+
http_port=8080,
31+
event_loop="uvloop", # "auto", "asyncio", or "uvloop"
32+
)
33+
34+
# Register models and start the server
35+
server.start(models=[])
36+
```

python/kserve/kserve/model_server.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,18 @@
116116
help="The asgi access logging format. It allows to override only the `uvicorn.access`'s format configuration "
117117
"with a richer set of fields",
118118
)
119+
parser.add_argument(
120+
"--event-loop",
121+
dest="event_loop",
122+
default="auto",
123+
type=str,
124+
choices=["auto", "asyncio", "uvloop"],
125+
help=(
126+
"Event loop implementation used by the HTTP server. "
127+
"Valid values are 'auto' (default; use uvloop if available), "
128+
"'asyncio', or 'uvloop'."
129+
),
130+
)
119131

120132
# Model arguments: The arguments are passed to the kserve.Model object
121133
parser.add_argument(
@@ -206,6 +218,7 @@ def __init__(
206218
enable_docs_url: bool = args.enable_docs_url,
207219
enable_latency_logging: bool = args.enable_latency_logging,
208220
access_log_format: str = args.access_log_format,
221+
event_loop: str = args.event_loop,
209222
grace_period: int = 30,
210223
predictor_config: Optional[PredictorConfig] = None,
211224
):
@@ -227,6 +240,7 @@ def __init__(
227240
ASGI specs that don't describe how access logging should be implemented in detail
228241
(please refer to this Uvicorn
229242
[github issue](https://github.com/encode/uvicorn/issues/527) for more info).
243+
event_loop: Uvicorn event loop. Default: ``'auto'``. It supports "auto", "asyncio", "uvloop".
230244
grace_period: The grace period in seconds to wait for the server to stop. Default: ``30``.
231245
predictor_config: Optional configuration for the predictor. Default: ``None``.
232246
"""
@@ -236,6 +250,7 @@ def __init__(
236250
self.http_port = http_port
237251
self.grpc_port = grpc_port
238252
self.workers = workers
253+
self.event_loop = event_loop
239254
self.max_threads = max_threads
240255
self.max_asyncio_workers = max_asyncio_workers
241256
self.enable_grpc = enable_grpc
@@ -317,6 +332,7 @@ def start(self, models: List[BaseKServeModel]) -> None:
317332
workers=self.workers,
318333
grace_period=self.grace_period,
319334
log_config_file=args.log_config_file,
335+
event_loop=self.event_loop,
320336
)
321337
self.servers.append(self._rest_multiprocess_server.start())
322338
else:
@@ -328,6 +344,7 @@ def start(self, models: List[BaseKServeModel]) -> None:
328344
access_log_format=self.access_log_format,
329345
workers=self.workers,
330346
grace_period=self.grace_period,
347+
event_loop=self.event_loop,
331348
)
332349
self.servers.append(self._rest_server.start())
333350
if self.enable_grpc:

python/kserve/kserve/protocol/rest/multiprocess/server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def __init__(
132132
workers: int = 1,
133133
grace_period: int = 30,
134134
log_config_file: Optional[str] = None,
135+
event_loop: str = "auto",
135136
) -> None:
136137
self.log_config_file = log_config_file
137138
self._rest_server = RESTServer(
@@ -142,6 +143,7 @@ def __init__(
142143
access_log_format,
143144
workers,
144145
grace_period,
146+
event_loop,
145147
)
146148
self._processes: List[RESTServerProcess] = []
147149
self.should_exit = asyncio.Event()

python/kserve/kserve/protocol/rest/server.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ def timing(self, metric_name, timing, tags):
6363
trace_logger.info(f"{metric_name}: {timing} {tags}")
6464

6565

66+
VALID_UVICORN_LOOPS = {"auto", "asyncio", "uvloop"}
67+
68+
6669
class RESTServer:
6770
def __init__(
6871
self,
@@ -73,10 +76,18 @@ def __init__(
7376
access_log_format: Optional[str] = None,
7477
workers: int = 1,
7578
grace_period: int = 30,
79+
event_loop: str = "auto",
7680
):
7781
self.dataplane = data_plane
7882
self.model_repository_extension = model_repository_extension
7983
self.access_log_format = access_log_format
84+
if event_loop not in VALID_UVICORN_LOOPS:
85+
logger.error(
86+
f"Invalid event loop specified: '{event_loop}'. "
87+
f"Supported values are {VALID_UVICORN_LOOPS}, "
88+
f"Falling back to 'auto'."
89+
)
90+
event_loop = "auto"
8091
self.config = uvicorn.Config(
8192
app,
8293
host="0.0.0.0",
@@ -86,7 +97,7 @@ def __init__(
8697
# configured by kserve.
8798
log_config=None,
8899
timeout_graceful_shutdown=grace_period,
89-
loop="asyncio",
100+
loop=event_loop,
90101
)
91102
self._server = uvicorn.Server(self.config)
92103

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright 2023 The KServe Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from unittest.mock import Mock
16+
17+
import pytest
18+
19+
from kserve.protocol.rest import server as rest_mod
20+
21+
22+
@pytest.mark.parametrize(
23+
"loop_value,expected",
24+
[
25+
("auto", "auto"),
26+
("asyncio", "asyncio"),
27+
("uvloop", "uvloop"),
28+
("invalid-value", "auto"), # invalid falls back to 'auto'
29+
],
30+
)
31+
def test_config_loop_value(loop_value, expected, monkeypatch):
32+
monkeypatch.setattr(rest_mod.RESTServer, "create_application", lambda self: None)
33+
data_plane = Mock()
34+
model_repo_ext = Mock()
35+
36+
rs = rest_mod.RESTServer(
37+
app="dummy:app",
38+
data_plane=data_plane,
39+
model_repository_extension=model_repo_ext,
40+
http_port=8080,
41+
event_loop=loop_value,
42+
)
43+
44+
assert rs.config.loop == expected

python/kserve/test/test_server.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import re
2121
from typing import Dict
2222
from unittest import mock
23+
from unittest.mock import Mock
2324

2425
import avro.io
2526
import avro.schema
@@ -33,7 +34,7 @@
3334
from fastapi.testclient import TestClient
3435
from ray import serve
3536

36-
from kserve import Model, ModelRepository, ModelServer
37+
from kserve import Model, ModelRepository, ModelServer, model_server
3738
from kserve.constants.constants import (
3839
FASTAPI_APP_IMPORT_STRING,
3940
INFERENCE_CONTENT_LENGTH_HEADER,
@@ -1412,3 +1413,64 @@ def test_model_server_backwards_compatibility(self):
14121413

14131414
# Context should handle None predictor config gracefully
14141415
# The DataPlane should still be functional even without predictor config
1416+
1417+
1418+
@pytest.mark.asyncio
1419+
class TestModelServerEventLoopWithDummy:
1420+
async def test_start_passes_event_loop_to_rest_server(self, monkeypatch):
1421+
created = {}
1422+
1423+
class DummyREST:
1424+
def __init__(self, *args, **kwargs):
1425+
created["instance"] = self
1426+
self.event_loop = kwargs.get("event_loop")
1427+
1428+
def start(self):
1429+
return None
1430+
1431+
# Patch to capture constructor and avoid side effects
1432+
monkeypatch.setattr(model_server, "RESTServer", DummyREST)
1433+
monkeypatch.setattr(model_server.asyncio, "run", Mock())
1434+
monkeypatch.setattr(
1435+
model_server.ModelServer, "setup_event_loop", lambda _: None
1436+
)
1437+
monkeypatch.setattr(
1438+
model_server.ModelServer, "register_signal_handler", lambda _: None
1439+
)
1440+
1441+
ms = model_server.ModelServer(workers=1, event_loop="uvloop")
1442+
m = DummyModel("TestModel")
1443+
m.load()
1444+
ms.start(models=[m])
1445+
1446+
assert isinstance(created.get("instance"), DummyREST)
1447+
assert created["instance"].event_loop == "uvloop"
1448+
1449+
async def test_start_passes_event_loop_to_rest_multiprocess(self, monkeypatch):
1450+
created = {}
1451+
1452+
class DummyMulti:
1453+
def __init__(self, *args, **kwargs):
1454+
created["instance"] = self
1455+
self.event_loop = kwargs.get("event_loop")
1456+
1457+
def start(self):
1458+
return None
1459+
1460+
# Patch multiprocess REST server and side effects
1461+
monkeypatch.setattr(model_server, "RESTServerMultiProcess", DummyMulti)
1462+
monkeypatch.setattr(model_server.asyncio, "run", Mock())
1463+
monkeypatch.setattr(
1464+
model_server.ModelServer, "setup_event_loop", lambda _: None
1465+
)
1466+
monkeypatch.setattr(
1467+
model_server.ModelServer, "register_signal_handler", lambda _: None
1468+
)
1469+
1470+
ms = model_server.ModelServer(workers=4, event_loop="asyncio")
1471+
m = DummyModel("TestModel")
1472+
m.load()
1473+
ms.start(models=[m])
1474+
1475+
assert isinstance(created.get("instance"), DummyMulti)
1476+
assert created["instance"].event_loop == "asyncio"

0 commit comments

Comments
 (0)