Skip to content

Commit c054b43

Browse files
tests(maas-billing): add rate-limit tests for TinyLlama (free & premium) (#924)
* tests(maas-billing): add rate-limit tests for TinyLlama (free & premium) * review comment implemented and added limit patch * test(maas-billing): address review feedback in utils * Add MaaS request/token rate limit tests and policy resources * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test(maas-billing): updated utils * tests(maas-billing): address review feedback in utils * tests(maas-billing): address review feedback --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent bf00604 commit c054b43

File tree

7 files changed

+605
-22
lines changed

7 files changed

+605
-22
lines changed

tests/model_serving/model_server/maas_billing/conftest.py

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from typing import Generator
2-
1+
from typing import Generator, Dict, List
32
import base64
43
import pytest
54
import requests
@@ -23,8 +22,7 @@
2322
from utilities.infra import login_with_user_password, get_openshift_token
2423
from utilities.general import wait_for_oauth_openshift_deployment
2524
from ocp_resources.secret import Secret
26-
27-
25+
from tests.model_serving.model_server.maas_billing.utils import get_total_tokens
2826
from tests.model_serving.model_server.maas_billing.utils import (
2927
detect_scheme_via_llmisvc,
3028
host_from_ingress_domain,
@@ -34,6 +32,8 @@
3432
create_maas_group,
3533
build_maas_headers,
3634
get_maas_models_response,
35+
verify_chat_completions,
36+
maas_gateway_rate_limits_patched,
3737
)
3838

3939

@@ -81,8 +81,10 @@ def model_url(
8181
maas_scheme: str,
8282
maas_host: str,
8383
admin_client: DynamicClient,
84+
maas_inference_service_tinyllama: LLMInferenceService,
8485
) -> str:
8586
deployment = llmis_name(client=admin_client)
87+
# deployment = maas_inference_service_tinyllama.name
8688
return f"{maas_scheme}://{maas_host}/llm/{deployment}{CHAT_COMPLETIONS}"
8789

8890

@@ -374,7 +376,7 @@ def maas_premium_group(
374376
yield group.name
375377

376378

377-
@pytest.fixture
379+
@pytest.fixture(scope="class")
378380
def ocp_token_for_actor(
379381
request,
380382
maas_api_server_url: str,
@@ -421,7 +423,7 @@ def ocp_token_for_actor(
421423
assert original_login_successful, f"Failed to log back in as original user '{original_user}'"
422424

423425

424-
@pytest.fixture
426+
@pytest.fixture(scope="class")
425427
def maas_token_for_actor(
426428
request_session_http: requests.Session,
427429
base_url: str,
@@ -449,13 +451,13 @@ def maas_token_for_actor(
449451
return token
450452

451453

452-
@pytest.fixture
454+
@pytest.fixture(scope="class")
453455
def maas_headers_for_actor(maas_token_for_actor: str) -> dict:
454456
"""Headers for the current actor (admin/free/premium)."""
455457
return build_maas_headers(token=maas_token_for_actor)
456458

457459

458-
@pytest.fixture
460+
@pytest.fixture(scope="class")
459461
def maas_models_response_for_actor(
460462
request_session_http: requests.Session,
461463
base_url: str,
@@ -469,6 +471,60 @@ def maas_models_response_for_actor(
469471
)
470472

471473

474+
@pytest.fixture(scope="class")
475+
def maas_models_for_actor(
476+
maas_models_response_for_actor: requests.Response,
477+
) -> List[Dict]:
478+
479+
models_list = maas_models_response_for_actor.json().get("data", [])
480+
assert models_list, "no models returned from /v1/models"
481+
return models_list
482+
483+
484+
@pytest.fixture(scope="class")
485+
def exercise_rate_limiter(
486+
actor_label: str,
487+
scenario: dict,
488+
request_session_http: requests.Session,
489+
model_url: str,
490+
maas_headers_for_actor: Dict[str, str],
491+
maas_models_for_actor: List[Dict],
492+
) -> List[int]:
493+
494+
models_list = maas_models_for_actor
495+
496+
max_requests = scenario["max_requests"]
497+
max_tokens = scenario["max_tokens"]
498+
log_prefix = scenario["log_prefix"]
499+
500+
status_codes_list: List[int] = []
501+
502+
for attempt_index in range(max_requests):
503+
LOGGER.info(f"{log_prefix}[{actor_label}]: attempt {attempt_index + 1}/{max_requests}")
504+
505+
response = verify_chat_completions(
506+
request_session_http=request_session_http,
507+
model_url=model_url,
508+
headers=maas_headers_for_actor,
509+
models_list=models_list,
510+
prompt_text="Repeat the word 'token' 60 times, separated by spaces. No extra text.",
511+
max_tokens=max_tokens,
512+
request_timeout_seconds=60,
513+
log_prefix=f"{log_prefix}[{actor_label}]",
514+
expected_status_codes=(200, 429),
515+
)
516+
517+
status_codes_list.append(response.status_code)
518+
519+
total_tokens = get_total_tokens(resp=response)
520+
521+
if scenario["id"] == "token-rate" and response.status_code == 200:
522+
total_tokens = get_total_tokens(resp=response, fail_if_missing=True)
523+
LOGGER.info(f"{log_prefix}[{actor_label}]: total_tokens={total_tokens}")
524+
LOGGER.info(f"{log_prefix}[{actor_label}]: status_codes={status_codes_list}")
525+
return status_codes_list
526+
527+
472528
@pytest.fixture(scope="class")
473529
def maas_inference_service_tinyllama(
474530
admin_client: DynamicClient,
@@ -530,6 +586,23 @@ def maas_scheme(admin_client: DynamicClient, unprivileged_model_namespace: Names
530586
)
531587

532588

533-
@pytest.fixture(scope="session")
589+
@pytest.fixture(scope="class")
534590
def maas_host(admin_client):
535591
return host_from_ingress_domain(client=admin_client)
592+
593+
594+
@pytest.fixture(scope="class")
595+
def maas_gateway_rate_limits(
596+
admin_client: DynamicClient,
597+
) -> Generator[None, None, None]:
598+
namespace = "openshift-ingress"
599+
token_policy_name = "gateway-token-rate-limits"
600+
request_policy_name = "gateway-rate-limits"
601+
602+
with maas_gateway_rate_limits_patched(
603+
admin_client=admin_client,
604+
namespace=namespace,
605+
token_policy_name=token_policy_name,
606+
request_policy_name=request_policy_name,
607+
):
608+
yield
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from typing import List
2+
import pytest
3+
from simple_logger.logger import get_logger
4+
from tests.model_serving.model_server.maas_billing.utils import (
5+
assert_mixed_200_and_429,
6+
)
7+
8+
LOGGER = get_logger(name=__name__)
9+
10+
REQUEST_RATE_MAX_REQUESTS = 10
11+
12+
ACTORS = [
13+
pytest.param({"type": "free"}, "free", id="free"),
14+
pytest.param({"type": "premium"}, "premium", id="premium"),
15+
]
16+
17+
SCENARIO_REQUEST_RATE = {
18+
"id": "request-rate",
19+
"max_requests": REQUEST_RATE_MAX_REQUESTS,
20+
"max_tokens": 5,
21+
"sleep_between_seconds": 0.1,
22+
"log_prefix": "MaaS request-rate",
23+
"context": "request-rate burst",
24+
}
25+
26+
27+
@pytest.mark.usefixtures(
28+
"maas_inference_service_tinyllama",
29+
"maas_free_group",
30+
"maas_premium_group",
31+
"maas_gateway_rate_limits",
32+
)
33+
@pytest.mark.parametrize(
34+
"unprivileged_model_namespace",
35+
[
36+
pytest.param(
37+
{"name": "llm", "modelmesh-enabled": False},
38+
id="maas-billing-namespace",
39+
)
40+
],
41+
indirect=True,
42+
)
43+
@pytest.mark.parametrize(
44+
"ocp_token_for_actor, actor_label",
45+
ACTORS,
46+
indirect=["ocp_token_for_actor"],
47+
scope="class",
48+
)
49+
class TestMaasRequestRateLimits:
50+
"""
51+
MaaS Billing – request-rate limit tests against TinyLlama.
52+
"""
53+
54+
@pytest.fixture(scope="class")
55+
def scenario(self):
56+
return SCENARIO_REQUEST_RATE
57+
58+
def test_request_rate_limits(
59+
self,
60+
ocp_token_for_actor: str,
61+
actor_label: str,
62+
scenario: dict,
63+
exercise_rate_limiter: List[int],
64+
) -> None:
65+
66+
_ = ocp_token_for_actor
67+
status_codes_list = exercise_rate_limiter
68+
69+
assert_mixed_200_and_429(
70+
actor_label=actor_label,
71+
status_codes_list=status_codes_list,
72+
context=scenario["context"],
73+
require_429=True,
74+
)
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from typing import List
2+
import pytest
3+
from simple_logger.logger import get_logger
4+
from tests.model_serving.model_server.maas_billing.utils import (
5+
assert_mixed_200_and_429,
6+
)
7+
8+
LOGGER = get_logger(name=__name__)
9+
10+
TOKEN_RATE_MAX_REQUESTS = 8
11+
LARGE_MAX_TOKENS = 80
12+
13+
ACTORS = [
14+
pytest.param({"type": "free"}, "free", id="free"),
15+
pytest.param({"type": "premium"}, "premium", id="premium"),
16+
]
17+
18+
SCENARIO_TOKEN_RATE = {
19+
"id": "token-rate",
20+
"max_requests": TOKEN_RATE_MAX_REQUESTS,
21+
"max_tokens": LARGE_MAX_TOKENS,
22+
"sleep_between_seconds": 0.2,
23+
"log_prefix": "MaaS token-rate",
24+
"context": "token-rate tests",
25+
}
26+
27+
28+
@pytest.mark.usefixtures(
29+
"maas_inference_service_tinyllama",
30+
"maas_free_group",
31+
"maas_premium_group",
32+
"maas_gateway_rate_limits",
33+
)
34+
@pytest.mark.parametrize(
35+
"unprivileged_model_namespace",
36+
[
37+
pytest.param(
38+
{"name": "llm", "modelmesh-enabled": False},
39+
id="maas-billing-namespace",
40+
)
41+
],
42+
indirect=True,
43+
)
44+
@pytest.mark.parametrize(
45+
"ocp_token_for_actor, actor_label",
46+
ACTORS,
47+
indirect=["ocp_token_for_actor"],
48+
scope="class",
49+
)
50+
class TestMaasTokenRateLimits:
51+
"""
52+
MaaS Billing – token-rate limit tests against TinyLlama.
53+
"""
54+
55+
@pytest.fixture(scope="class")
56+
def scenario(self):
57+
return SCENARIO_TOKEN_RATE
58+
59+
def test_token_rate_limits(
60+
self,
61+
ocp_token_for_actor: str,
62+
actor_label: str,
63+
scenario: dict,
64+
exercise_rate_limiter: List[int],
65+
) -> None:
66+
67+
_ = ocp_token_for_actor
68+
status_codes_list = exercise_rate_limiter
69+
70+
assert_mixed_200_and_429(
71+
actor_label=actor_label,
72+
status_codes_list=status_codes_list,
73+
context=scenario["context"],
74+
require_429=False,
75+
)
76+
77+
LOGGER.info(f"MaaS token-rate[{actor_label}]: final status_codes={status_codes_list}")

0 commit comments

Comments
 (0)