remove unused code as per tox-tests failure

threcc · threcc · commit 7297695cbfee · 2026-03-10T17:13:10.000+01:00
diff --git a/utilities/llmd_utils.py b/utilities/llmd_utils.py
@@ -1,29 +1,21 @@
 """Utilities for LLM Deployment (LLMD) resources."""
 
-import json
-import re
-import shlex
 from collections.abc import Generator
 from contextlib import contextmanager
-from string import Template
 from typing import Any
 
 from kubernetes.dynamic import DynamicClient
 from ocp_resources.gateway import Gateway
 from ocp_resources.llm_inference_service import LLMInferenceService
-from pyhelper_utils.shell import run_command
 from simple_logger.logger import get_logger
-from timeout_sampler import TimeoutWatch, retry
+from timeout_sampler import TimeoutWatch
 
-from utilities.certificates_utils import get_ca_bundle
-from utilities.constants import HTTPRequest, Timeout
-from utilities.exceptions import InferenceResponseError
+from utilities.constants import Timeout
 from utilities.infra import get_services_by_isvc_label
 from utilities.llmd_constants import (
     ContainerImages,
     KServeGateway,
     LLMDGateway,
-    LLMEndpoint,
 )
 
 LOGGER = get_logger(name=__name__)
@@ -396,340 +388,3 @@ def get_llm_inference_url(llm_service: LLMInferenceService) -> str:
     fallback_url = f"http://{llm_service.name}.{llm_service.namespace}.svc.cluster.local"
     LOGGER.debug(f"Using fallback URL for {llm_service.name}: {fallback_url}")
     return fallback_url
-
-
-def verify_inference_response_llmd(
-    llm_service: LLMInferenceService,
-    inference_config: dict[str, Any],
-    inference_type: str,
-    protocol: str,
-    model_name: str | None = None,
-    inference_input: Any | None = None,
-    use_default_query: bool = False,
-    expected_response_text: str | None = None,
-    insecure: bool = False,
-    token: str | None = None,
-    authorized_user: bool | None = None,
-) -> None:
-    """
-    Verify the LLM inference response following the pattern of verify_inference_response.
-
-    Args:
-        llm_service: LLMInferenceService resource to test
-        inference_config: Inference configuration dictionary
-        inference_type: Type of inference ('infer', 'streaming', etc.)
-        protocol: Protocol to use ('http', 'grpc')
-        model_name: Name of the model (defaults to service name)
-        inference_input: Input for inference (optional)
-        use_default_query: Whether to use default query from config
-        expected_response_text: Expected response text for validation
-        insecure: Whether to use insecure connections
-        token: Authentication token (optional)
-        authorized_user: Whether user should be authorized (optional)
-
-    Raises:
-        InferenceResponseError: If inference response is invalid
-        ValueError: If inference response validation fails
-    """
-
-    model_name = model_name or llm_service.name
-    inference = LLMUserInference(
-        llm_service=llm_service,
-        inference_config=inference_config,
-        inference_type=inference_type,
-        protocol=protocol,
-    )
-
-    res = inference.run_inference_flow(
-        model_name=model_name,
-        inference_input=inference_input,
-        use_default_query=use_default_query,
-        token=token,
-        insecure=insecure,
-    )
-
-    if authorized_user is False:
-        _validate_unauthorized_response(res=res, token=token, inference=inference)
-    else:
-        _validate_authorized_response(
-            res=res,
-            inference=inference,
-            inference_config=inference_config,
-            inference_type=inference_type,
-            expected_response_text=expected_response_text,
-            use_default_query=use_default_query,
-            model_name=model_name,
-        )
-
-
-class LLMUserInference:
-    """
-    LLM-specific inference handler following the pattern of UserInference.
-    """
-
-    STREAMING = "streaming"
-    INFER = "infer"
-
-    def __init__(
-        self,
-        llm_service: LLMInferenceService,
-        inference_config: dict[str, Any],
-        inference_type: str,
-        protocol: str,
-    ) -> None:
-        self.llm_service = llm_service
-        self.inference_config = inference_config
-        self.inference_type = inference_type
-        self.protocol = protocol
-        self.runtime_config = self.get_runtime_config()
-
-    def get_runtime_config(self) -> dict[str, Any]:
-        """Get runtime config from inference config based on inference type and protocol."""
-        if inference_type_config := self.inference_config.get(self.inference_type):
-            protocol = "http" if self.protocol.lower() in ["http", "https"] else self.protocol
-            if data := inference_type_config.get(protocol):
-                return data
-            else:
-                raise ValueError(f"Protocol {protocol} not supported for inference type {self.inference_type}")
-        else:
-            raise ValueError(f"Inference type {self.inference_type} not supported in config")
-
-    @property
-    def inference_response_text_key_name(self) -> str | None:
-        """Get inference response text key name from runtime config."""
-        return self.runtime_config.get("response_fields_map", {}).get("response_output")
-
-    @property
-    def inference_response_key_name(self) -> str:
-        """Get inference response key name from runtime config."""
-        return self.runtime_config.get("response_fields_map", {}).get("response", "output")
-
-    def get_inference_body(
-        self,
-        model_name: str,
-        inference_input: Any | None = None,
-        use_default_query: bool = False,
-    ) -> str:
-        """Get inference body for LLM request."""
-        if not use_default_query and inference_input is None:
-            raise ValueError("Either pass `inference_input` or set `use_default_query` to True")
-
-        if use_default_query:
-            default_query_config = self.inference_config.get("default_query_model")
-            if not default_query_config:
-                raise ValueError(f"Missing default query config for {model_name}")
-
-            if self.inference_config.get("support_multi_default_queries"):
-                query_config = default_query_config.get(self.inference_type)
-                if not query_config:
-                    raise ValueError(f"Missing default query for inference type {self.inference_type}")
-                query_input = query_config.get("query_input", "")
-            else:
-                query_input = default_query_config.get("query_input", "")
-
-            # Use the proper JSON body template from runtime config
-            body_template = self.runtime_config.get("body", "")
-            if body_template:
-                # Use template substitution for both model name and query input
-                template = Template(template=body_template)
-                body = template.safe_substitute(model_name=model_name, query_input=query_input)
-            else:
-                # Fallback to plain text (legacy behavior)
-                template = Template(template=query_input)
-                body = template.safe_substitute(model_name=model_name)
-        else:
-            # For custom input, create OpenAI-compatible format
-            if isinstance(inference_input, str):
-                body = json.dumps({
-                    "model": model_name,
-                    "messages": [{"role": "user", "content": inference_input}],
-                    "max_tokens": 100,
-                    "temperature": 0.0,
-                })
-            else:
-                body = json.dumps(inference_input)
-
-        return body
-
-    def generate_command(
-        self,
-        model_name: str,
-        inference_input: str | None = None,
-        use_default_query: bool = False,
-        insecure: bool = False,
-        token: str | None = None,
-    ) -> str:
-        """Generate curl command string for LLM inference."""
-        base_url = get_llm_inference_url(llm_service=self.llm_service)
-        endpoint_url = f"{base_url}{LLMEndpoint.CHAT_COMPLETIONS}"
-
-        body = self.get_inference_body(
-            model_name=model_name,
-            inference_input=inference_input,
-            use_default_query=use_default_query,
-        )
-
-        header = HTTPRequest.CONTENT_JSON.replace("-H ", "")
-        cmd_exec = "curl -i -s"
-        cmd = f"{cmd_exec} -X POST -d '{body}' -H {header} -H 'Accept: application/json'"
-
-        if token:
-            cmd += f" {HTTPRequest.AUTH_HEADER.format(token=token)}"
-
-        if insecure:
-            cmd += " --insecure"
-        else:
-            try:
-                from ocp_resources.resource import get_client
-
-                client = get_client()
-                ca_bundle = get_ca_bundle(client=client)
-                if ca_bundle:
-                    cmd += f" --cacert {ca_bundle}"
-                else:
-                    cmd += " --insecure"
-            except Exception:  # noqa: BLE001
-                cmd += " --insecure"
-
-        cmd += f" --max-time {LLMEndpoint.DEFAULT_TIMEOUT} {endpoint_url}"
-        return cmd
-
-    @retry(wait_timeout=Timeout.TIMEOUT_30SEC, sleep=5)
-    def run_inference(
-        self,
-        model_name: str,
-        inference_input: str | None = None,
-        use_default_query: bool = False,
-        insecure: bool = False,
-        token: str | None = None,
-    ) -> str:
-        """Run inference command and return raw output."""
-        cmd = self.generate_command(
-            model_name=model_name,
-            inference_input=inference_input,
-            use_default_query=use_default_query,
-            insecure=insecure,
-            token=token,
-        )
-
-        res, out, err = run_command(command=shlex.split(cmd), verify_stderr=False, check=False)
-        if res:
-            return out
-        raise ValueError(f"Inference failed with error: {err}\nOutput: {out}\nCommand: {cmd}")
-
-    def run_inference_flow(
-        self,
-        model_name: str,
-        inference_input: str | None = None,
-        use_default_query: bool = False,
-        insecure: bool = False,
-        token: str | None = None,
-    ) -> dict[str, Any]:
-        """Run LLM inference using the same high-level flow as inference_utils."""
-        out = self.run_inference(
-            model_name=model_name,
-            inference_input=inference_input,
-            use_default_query=use_default_query,
-            insecure=insecure,
-            token=token,
-        )
-        return {"output": out}
-
-
-def _validate_unauthorized_response(res: dict[str, Any], token: str | None, inference: LLMUserInference) -> None:
-    """Validate response for unauthorized users."""
-    auth_header = "x-ext-auth-reason"
-
-    if auth_reason := re.search(rf"{auth_header}: (.*)", res["output"], re.MULTILINE):
-        reason = auth_reason.group(1).lower()
-
-        if token:
-            assert re.search(r"not (?:authenticated|authorized)", reason)
-        else:
-            assert "credential not found" in reason
-    else:
-        forbidden_patterns = ["Forbidden", "401", "403", "Unauthorized"]
-        output = res["output"]
-
-        if any(pattern in output for pattern in forbidden_patterns):
-            return
-
-        raise ValueError(f"Auth header {auth_header} not found in response. Response: {output}")
-
-
-def _validate_authorized_response(
-    res: dict[str, Any],
-    inference: LLMUserInference,
-    inference_config: dict[str, Any],
-    inference_type: str,
-    expected_response_text: str | None,
-    use_default_query: bool,
-    model_name: str,
-) -> None:
-    """Validate response for authorized users."""
-
-    use_regex = False
-
-    if use_default_query:
-        expected_response_text_config = inference_config.get("default_query_model", {})
-        use_regex = expected_response_text_config.get("use_regex", False)
-
-        if not expected_response_text_config:
-            raise ValueError(f"Missing default_query_model config for inference {inference_config}")
-
-        if inference_config.get("support_multi_default_queries"):
-            query_config = expected_response_text_config.get(inference_type)
-            if not query_config:
-                raise ValueError(f"Missing default_query_model config for inference type {inference_type}")
-            expected_response_text = query_config.get("query_output", "")
-            use_regex = query_config.get("use_regex", False)
-        else:
-            expected_response_text = expected_response_text_config.get("query_output")
-
-        if not expected_response_text:
-            raise ValueError(f"Missing response text key for inference {inference_config}")
-
-        if isinstance(expected_response_text, str):
-            expected_response_text = Template(template=expected_response_text).safe_substitute(model_name=model_name)
-        elif isinstance(expected_response_text, dict):
-            response_output = expected_response_text.get("response_output")
-            if response_output is not None:
-                expected_response_text = Template(template=response_output).safe_substitute(model_name=model_name)
-    if inference.inference_response_text_key_name:
-        if inference_type == inference.STREAMING:
-            if output := re.findall(
-                rf"{inference.inference_response_text_key_name}\": \"(.*)\"",
-                res[inference.inference_response_key_name],
-                re.MULTILINE,
-            ):
-                assert "".join(output) == expected_response_text, (
-                    f"Expected: {expected_response_text} does not match response: {output}"
-                )
-        elif inference_type == inference.INFER or use_regex:
-            formatted_res = json.dumps(res[inference.inference_response_text_key_name]).replace(" ", "")
-            if use_regex and expected_response_text is not None:
-                assert re.search(expected_response_text, formatted_res), (
-                    f"Expected: {expected_response_text} not found in: {formatted_res}"
-                )
-            else:
-                formatted_res = json.dumps(res[inference.inference_response_key_name]).replace(" ", "")
-                assert formatted_res == expected_response_text, (
-                    f"Expected: {expected_response_text} does not match output: {formatted_res}"
-                )
-        else:
-            response = res[inference.inference_response_key_name]
-            if isinstance(response, list):
-                response = response[0]
-
-            if isinstance(response, dict):
-                response_text = response[inference.inference_response_text_key_name]
-                assert response_text == expected_response_text, (
-                    f"Expected: {expected_response_text} does not match response: {response_text}"
-                )
-            else:
-                raise InferenceResponseError(
-                    "Inference response output does not match expected output format."
-                    f"Expected: {expected_response_text}.\nResponse: {res}"
-                )
-    else:
-        raise InferenceResponseError(f"Inference response output not found in response. Response: {res}")