Skip to content

Commit 7fe1998

Browse files
committed
handle URL for inference when running on disconnected cluster
Signed-off-by: threcc <trecchiu@redhat.com>
1 parent 4009f96 commit 7fe1998

File tree

1 file changed

+36
-2
lines changed
  • tests/model_serving/model_server/llmd

1 file changed

+36
-2
lines changed

tests/model_serving/model_server/llmd/utils.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@
1313
from ocp_resources.llm_inference_service import LLMInferenceService
1414
from ocp_resources.pod import Pod
1515
from ocp_resources.prometheus import Prometheus
16+
from ocp_resources.route import Route
1617
from pyhelper_utils.shell import run_command
1718
from timeout_sampler import retry
1819

1920
from utilities.certificates_utils import get_ca_bundle
2021
from utilities.constants import Timeout
22+
from utilities.infra import is_disconnected_cluster
23+
from utilities.llmd_constants import LLMDGateway, LLMEndpoint
2124
from utilities.monitoring import get_metrics_value
2225

2326
LOGGER = structlog.get_logger(name=__name__)
@@ -75,6 +78,27 @@ def _get_inference_url(llmisvc: LLMInferenceService) -> str:
7578
return f"http://{llmisvc.name}.{llmisvc.namespace}.svc.cluster.local"
7679

7780

81+
def _get_disconnected_inference_url(llmisvc: LLMInferenceService) -> str:
82+
"""Build inference URL using the gateway Route for disconnected clusters.
83+
84+
On disconnected clusters the gateway uses ClusterIP instead of LoadBalancer,
85+
so the internal service URL from LLMISVC status is not reachable from outside
86+
the cluster. This function resolves the URL via the gateway Route instead.
87+
"""
88+
route = Route(
89+
client=llmisvc.client,
90+
name=LLMDGateway.DEFAULT_NAME,
91+
namespace=LLMDGateway.DEFAULT_NAMESPACE,
92+
)
93+
if not route.exists:
94+
raise RuntimeError(
95+
f"Gateway Route {LLMDGateway.DEFAULT_NAME} not found in {LLMDGateway.DEFAULT_NAMESPACE}. "
96+
"Disconnected clusters require the gateway Route to be configured."
97+
)
98+
host = route.instance.spec.host
99+
return f"https://{host}/{llmisvc.namespace}/{llmisvc.name}"
100+
101+
78102
def _build_chat_body(model_name: str, prompt: str, max_tokens: int = 50) -> str:
79103
"""Build OpenAI chat completion request body."""
80104
return json.dumps({
@@ -163,7 +187,12 @@ def send_chat_completions(
163187
insecure: bool = True,
164188
) -> tuple[int, str]:
165189
"""Send a chat completion request. Returns (status_code, response_body)."""
166-
url = _get_inference_url(llmisvc) + "/v1/chat/completions"
190+
base_url = (
191+
_get_disconnected_inference_url(llmisvc)
192+
if is_disconnected_cluster(llmisvc.client)
193+
else _get_inference_url(llmisvc)
194+
)
195+
url = base_url + LLMEndpoint.CHAT_COMPLETIONS
167196
model_name = _get_model_name(llmisvc=llmisvc)
168197
body = _build_chat_body(model_name=model_name, prompt=prompt)
169198
ca_cert = None if insecure else _resolve_ca_cert(llmisvc.client)
@@ -314,7 +343,12 @@ def send_prefix_cache_requests(
314343
successful = 0
315344
for i in range(count):
316345
try:
317-
status, _ = send_chat_completions(llmisvc=llmisvc, prompt=prompt, token=token, insecure=False)
346+
status, _ = send_chat_completions(
347+
llmisvc=llmisvc,
348+
prompt=prompt,
349+
token=token,
350+
insecure=False,
351+
)
318352
if status == 200:
319353
successful += 1
320354
except Exception:

0 commit comments

Comments
 (0)