|
14 | 14 |
|
15 | 15 | import asyncio |
16 | 16 | import os |
17 | | -from http import HTTPStatus |
18 | 17 |
|
19 | 18 | import openai |
20 | 19 | import pytest |
21 | 20 | import pytest_asyncio |
22 | | -import requests |
23 | 21 | from utils import RemoteOpenAIServer |
24 | 22 |
|
25 | 23 | MODEL_DIR = os.getenv("REBEL_VLLM_PRE_COMPILED_DIR") |
@@ -139,51 +137,3 @@ async def test_request_wrong_content_type(server: RemoteOpenAIServer): |
139 | 137 | "Content-Type": |
140 | 138 | "application/x-www-form-urlencoded" |
141 | 139 | }) |
142 | | - |
143 | | - |
144 | | -@pytest.mark.parametrize( |
145 | | - "server_args", |
146 | | - [ |
147 | | - pytest.param(["--enable-server-load-tracking"], |
148 | | - id="enable-server-load-tracking") |
149 | | - ], |
150 | | - indirect=True, |
151 | | -) |
152 | | -@pytest.mark.asyncio |
153 | | -async def test_server_load(server: RemoteOpenAIServer): |
154 | | - # Check initial server load |
155 | | - response = requests.get(server.url_for("load")) |
156 | | - assert response.status_code == HTTPStatus.OK |
157 | | - assert response.json().get("server_load") == 0 |
158 | | - |
159 | | - def make_long_completion_request(): |
160 | | - return requests.post( |
161 | | - server.url_for("v1/completions"), |
162 | | - headers={"Content-Type": "application/json"}, |
163 | | - json={ |
164 | | - "prompt": "Give me a long story", |
165 | | - "max_tokens": MAX_TOKENS, |
166 | | - "temperature": 0, |
167 | | - }, |
168 | | - ) |
169 | | - |
170 | | - # Start the completion request in a background thread. |
171 | | - completion_future = asyncio.create_task( |
172 | | - asyncio.to_thread(make_long_completion_request)) |
173 | | - |
174 | | - # Give a short delay to ensure the request has started. |
175 | | - await asyncio.sleep(0.1) |
176 | | - |
177 | | - # Check server load while the completion request is running. |
178 | | - response = requests.get(server.url_for("load")) |
179 | | - assert response.status_code == HTTPStatus.OK |
180 | | - assert response.json().get("server_load") == 1 |
181 | | - |
182 | | - # Wait for the completion request to finish. |
183 | | - await completion_future |
184 | | - await asyncio.sleep(0.1) |
185 | | - |
186 | | - # Check server load after the completion request has finished. |
187 | | - response = requests.get(server.url_for("load")) |
188 | | - assert response.status_code == HTTPStatus.OK |
189 | | - assert response.json().get("server_load") == 0 |
0 commit comments