Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@
indirect=True,
)
class TestKserveTokenAuthenticationRawForRest:
"""Validate KServe raw deployment token-based authentication for REST inference.

Steps:
1. Deploy an OVMS model with authentication enabled in a raw deployment namespace.
2. Query the model with a valid token and verify a successful REST inference response.
3. Disable authentication and verify the model is still queryable without a token.
4. Re-enable authentication and verify the model requires a valid token again.
5. Attempt cross-model authentication using another model's token and verify access is denied.
"""

@pytest.mark.smoke
@pytest.mark.ocp_interop
@pytest.mark.dependency(name="test_model_authentication_using_rest_raw")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
@pytest.mark.smoke
@pytest.mark.rawdeployment
class TestRawUnprivilegedUser:
"""Validate that a non-admin user can deploy and query a KServe raw deployment model.

Steps:
1. Create a namespace with unprivileged user credentials.
2. Deploy an OVMS model as a raw deployment using the non-admin user.
3. Query the deployed model via REST and verify a successful inference response.
"""

def test_non_admin_deploy_raw_and_query_model(
self,
unprivileged_s3_ovms_raw_inference_service,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,22 @@
indirect=True,
)
class TestInferenceGraphRaw:
"""Validate KServe InferenceGraph functionality in raw deployment mode.

Steps:
1. Deploy an OVMS serving runtime with ONNX support in the test namespace.
2. Create inference graphs with various configurations (public, private, auth-enabled).
3. Send inference requests through each graph and verify correct routing and responses.
4. Verify authentication enforcement by testing with privileged and unprivileged tokens.
"""

@pytest.mark.parametrize(
"dog_breed_inference_graph",
[{"name": "dog-breed-raw-pipeline", "deployment-mode": KServeDeploymentType.RAW_DEPLOYMENT}],
indirect=True,
)
def test_inference_graph_raw_deployment(self, dog_breed_inference_graph):
"""Verify inference through a public raw deployment inference graph."""
verify_inference_response(
inference_service=dog_breed_inference_graph,
inference_config=ONNX_INFERENCE_CONFIG,
Expand All @@ -46,6 +56,7 @@ def test_inference_graph_raw_deployment(self, dog_breed_inference_graph):
indirect=True,
)
def test_private_inference_graph_raw_deployment(self, dog_breed_inference_graph):
"""Verify inference through a private (no external route) raw deployment inference graph."""
verify_inference_response(
inference_service=dog_breed_inference_graph,
inference_config=ONNX_INFERENCE_CONFIG,
Expand All @@ -69,6 +80,7 @@ def test_private_inference_graph_raw_deployment(self, dog_breed_inference_graph)
indirect=True,
)
def test_inference_graph_raw_authentication(self, dog_breed_inference_graph, inference_graph_sa_token_with_access):
"""Verify inference through an auth-enabled inference graph using an authorized service account token."""
verify_inference_response(
inference_service=dog_breed_inference_graph,
inference_config=ONNX_INFERENCE_CONFIG,
Expand All @@ -94,6 +106,7 @@ def test_inference_graph_raw_authentication(self, dog_breed_inference_graph, inf
def test_private_inference_graph_raw_authentication(
self, dog_breed_inference_graph, inference_graph_sa_token_with_access
):
"""Verify inference through a private auth-enabled inference graph using an authorized token."""
verify_inference_response(
inference_service=dog_breed_inference_graph,
inference_config=ONNX_INFERENCE_CONFIG,
Expand All @@ -119,6 +132,7 @@ def test_private_inference_graph_raw_authentication(
def test_inference_graph_raw_authentication_without_privileges(
self, dog_breed_inference_graph, inference_graph_unprivileged_sa_token
):
"""Verify that an unprivileged token is denied access to an auth-enabled inference graph."""
verify_inference_response(
inference_service=dog_breed_inference_graph,
inference_config=ONNX_INFERENCE_CONFIG,
Expand All @@ -145,6 +159,7 @@ def test_inference_graph_raw_authentication_without_privileges(
def test_private_inference_graph_raw_authentication_without_privileges(
self, dog_breed_inference_graph, inference_graph_unprivileged_sa_token
):
"""Verify that an unprivileged token is denied access to a private auth-enabled inference graph."""
verify_inference_response(
inference_service=dog_breed_inference_graph,
inference_config=ONNX_INFERENCE_CONFIG,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@
indirect=True,
)
class TestRawISVCEnvVarsUpdates:
"""Validate adding and removing environment variables on a KServe raw deployment ISVC.

Steps:
1. Deploy an OVMS inference service with custom environment variables.
2. Verify the environment variables are present in the predictor pods.
3. Remove the environment variables from the inference service.
4. Verify the environment variables are no longer present in the predictor pods.
"""

def test_raw_with_isvc_env_vars(self, ovms_kserve_inference_service):
"""Test adding environment variables to the inference service"""
verify_env_vars_in_isvc_pods(isvc=ovms_kserve_inference_service, env_vars=ISVC_ENV_VARS, vars_exist=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@
indirect=True,
)
class TestISVCPullSecretUpdate:
"""Validate pull secret lifecycle operations on a KServe model car inference service.

Steps:
1. Deploy a model car ISVC with an initial pull secret attached.
2. Verify the initial pull secret is correctly set in the predictor pod.
3. Update the pull secret to a new value and verify it is reflected in the new pod.
4. Remove the pull secret and verify it is no longer present in the pod.
"""

@pytest.mark.tier1
def test_initial_pull_secret_set(self, model_car_raw_inference_service_with_pull_secret):
"""Ensure initial pull secret is correctly set in the pod"""
Expand All @@ -36,4 +45,5 @@ def test_update_pull_secret(self, updated_isvc_pull_secret):
verify_pull_secret(isvc=updated_isvc_pull_secret, pull_secret=UPDATED_PULL_SECRET, secret_exists=True)

def test_remove_pull_secret(self, updated_isvc_remove_pull_secret):
"""Remove the pull secret and verify it is no longer present in the pod."""
verify_pull_secret(isvc=updated_isvc_remove_pull_secret, pull_secret=UPDATED_PULL_SECRET, secret_exists=False)
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@
indirect=True,
)
class TestRawISVCReplicasUpdates:
"""Validate scaling replica count up and down on a KServe raw deployment ISVC.

Steps:
1. Deploy an OVMS inference service with 2 min-replicas and 4 max-replicas.
2. Verify that 2 predictor pods are running after deployment.
3. Run inference to confirm the model responds correctly with multiple replicas.
4. Patch the ISVC to scale down to 1 replica and verify only 1 pod remains.
5. Run inference again to confirm the model responds correctly after scale-down.
"""
Comment on lines +42 to +50
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Docstring inaccurately describes the test flow.

The docstring implies scaling up (2→4) and down (4→1), but the tests only verify the initial deployment of 2 replicas and then scale down to 1. There's no actual scale-up operation tested. The class is named TestRawISVCReplicasUpdates and the first test is named test_raw_increase_isvc_replicas, but it only asserts the initial replica count rather than testing an increase operation.

📝 Proposed correction
-    """Validate scaling replica count up and down on a KServe raw deployment ISVC.
+    """Validate replica scaling behavior on a KServe raw deployment ISVC.
 
     Steps:
         1. Deploy an OVMS inference service with 2 min-replicas and 4 max-replicas.
         2. Verify that 2 predictor pods are running after deployment.
         3. Run inference to confirm the model responds correctly with multiple replicas.
         4. Patch the ISVC to scale down to 1 replica and verify only 1 pod remains.
         5. Run inference again to confirm the model responds correctly after scale-down.
     """
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@tests/model_serving/model_server/kserve/inference_service_lifecycle/test_isvc_replicas_update.py`
around lines 42 - 50, The docstring and test name claim an increase operation
but the code only verifies initial 2 replicas then scales down to 1; update the
documentation and test name to match behavior: change the module/class docstring
to describe "deploy with 2 min-replicas, verify 2 pods, run inference, patch
ISVC to scale down to 1 and verify 1 pod and inference" and rename the test
function test_raw_increase_isvc_replicas to test_raw_decrease_isvc_replicas (or
similar) so the name reflects the actual assertion; locate references to
TestRawISVCReplicasUpdates and test_raw_increase_isvc_replicas in the file to
update them consistently.


@pytest.mark.dependency(name="test_raw_increase_isvc_replicas")
def test_raw_increase_isvc_replicas(self, isvc_pods, ovms_kserve_inference_service):
"""Test replicas increase"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@
indirect=True,
)
class TestStopRaw:
"""Validate stopping a running KServe raw deployment model via the stop annotation.

Steps:
1. Deploy an OVMS ONNX model as a raw deployment with stop set to false.
2. Verify the model can be queried via REST.
3. Patch the ISVC stop annotation to true.
4. Verify all predictor pods are deleted and remain absent.
"""

def test_raw_onnx_rest_inference(
self, unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_raw_inference_service
):
Expand All @@ -58,8 +67,7 @@ def test_stop_and_update_to_true_delete_pod_rollout(
ovms_raw_inference_service,
patched_raw_inference_service_stop_annotation,
):
"""Verify pod rollout is deleted when the stop annotation updated to true"""
"""Verify pods do not exist"""
"""Verify pod rollout is deleted when the stop annotation is updated to true."""
result = consistently_verify_no_pods_exist(
client=unprivileged_client,
isvc=patched_raw_inference_service_stop_annotation,
Expand All @@ -86,15 +94,23 @@ def test_stop_and_update_to_true_delete_pod_rollout(
indirect=True,
)
class TestStoppedResumeRaw:
"""Validate resuming a stopped KServe raw deployment model by clearing the stop annotation.

Steps:
1. Deploy an OVMS ONNX model as a raw deployment with stop set to true.
2. Verify no predictor pods are created while the stop annotation is true.
3. Patch the ISVC stop annotation to false.
4. Verify predictor pods are rolled out and the model can be queried via REST.
"""

def test_stop_and_true_no_pod_rollout(
self,
unprivileged_client,
unprivileged_model_namespace,
ovms_kserve_serving_runtime,
ovms_raw_inference_service,
):
"""Verify no pod rollout when the stop annotation is true"""
"""Verify pods do not exist"""
"""Verify no pod rollout when the stop annotation is true."""
result = consistently_verify_no_pods_exist(
client=unprivileged_client,
isvc=ovms_raw_inference_service,
Expand All @@ -114,8 +130,7 @@ def test_stop_and_update_to_false_pod_rollout(
ovms_raw_inference_service,
patched_raw_inference_service_stop_annotation,
):
"""Verify pod rollout when the stop annotation is updated to false"""
"""Verify that kserve Raw ONNX model can be queried using REST"""
"""Verify pod rollout and REST inference after the stop annotation is set to false."""
verify_inference_response(
inference_service=patched_raw_inference_service_stop_annotation,
inference_config=ONNX_INFERENCE_CONFIG,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ def test_raw_onnx_rout_reconciliation(self, ovms_raw_inference_service):
)

def test_route_value_before_and_after_deletion(self, unprivileged_client, ovms_raw_inference_service):
# Validate ingress status before and after route deletion
"""Verify that the ingress status changes after the route is deleted and recreated."""
assert_ingress_status_changed(client=unprivileged_client, inference_service=ovms_raw_inference_service)

def test_model_works_after_route_is_recreated(self, ovms_raw_inference_service):
# Final inference validation after route update
"""Verify that the model is still queryable via REST after the route is recreated."""
verify_inference_response(
inference_service=ovms_raw_inference_service,
inference_config=ONNX_INFERENCE_CONFIG,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@
indirect=True,
)
class TestRestRawDeploymentRoutes:
"""Validate REST route visibility transitions for KServe raw deployment.

Steps:
1. Deploy a Caikit-TGIS model as a raw deployment with HTTP enabled.
2. Verify the default route visibility label is not set.
3. Query the model via the internal route and confirm a successful response.
4. Patch the ISVC to expose the route externally and verify inference over HTTPS.
5. Revert the route to local-cluster visibility and verify external access is disabled.
"""

def test_default_visibility_value(self, s3_models_inference_service):
"""Test default route visibility value"""
if labels := s3_models_inference_service.labels:
Expand Down Expand Up @@ -126,6 +136,15 @@ def test_disabled_rest_raw_deployment_exposed_route(self, patched_s3_caikit_kser
indirect=True,
)
class TestRestRawDeploymentRoutesTimeout:
"""Validate REST route timeout behavior for KServe raw deployment.

Steps:
1. Deploy a Caikit-TGIS model as a raw deployment with an external route.
2. Verify inference succeeds over the exposed HTTPS route.
3. Patch the route with an extremely low timeout annotation.
4. Verify inference fails with a 504 Gateway Time-out error.
"""

@pytest.mark.dependency(name="test_rest_raw_deployment_exposed_route")
def test_rest_raw_deployment_exposed_route(self, s3_models_inference_service):
"""Test HTTP inference using exposed (external) route"""
Expand Down Expand Up @@ -191,6 +210,14 @@ def test_rest_raw_deployment_exposed_route_with_timeout(self, s3_models_inferenc
)
@pytest.mark.skip(reason="skipping grpc raw for tgis-caikit")
class TestGrpcRawDeployment:
"""Validate gRPC route visibility transitions for KServe raw deployment.

Steps:
1. Deploy a Caikit-TGIS model as a raw deployment with gRPC enabled.
2. Query the model via the internal gRPC route and confirm a successful streaming response.
3. Patch the ISVC to expose the route externally and verify gRPC inference over the exposed route.
"""

def test_grpc_raw_deployment_internal_route(self, s3_models_inference_service):
"""Test GRPC inference using internal route"""
verify_inference_response(
Expand Down Expand Up @@ -247,6 +274,15 @@ def test_grpc_raw_deployment_exposed_route(self, patched_s3_caikit_kserve_isvc_v
)
@pytest.mark.skip(reason="skipping grpc raw for tgis-caikit")
class TestGrpcRawDeploymentTimeout:
"""Validate gRPC route timeout behavior for KServe raw deployment.

Steps:
1. Deploy a Caikit-TGIS model as a raw deployment with an external gRPC route.
2. Verify gRPC inference succeeds over the exposed route.
3. Patch the route with an extremely low timeout annotation.
4. Verify gRPC inference fails with a 504 Gateway Time-out error.
"""

@pytest.mark.dependency(name="test_grpc_raw_deployment_exposed_route")
def test_grpc_raw_deployment_exposed_route(self, s3_models_inference_service):
"""Test GRPC inference using exposed (external) route"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,19 @@
indirect=True,
)
class TestMultiNode:
"""Validate multi-node GPU inference with Ray-based vLLM serving on KServe.

Steps:
1. Deploy a Granite-8B model on a multi-node vLLM inference service backed by PVC storage.
2. Verify Ray cluster health and NVIDIA GPU status across head and worker pods.
3. Validate default runtime worker spec (tensorParallelSize=1, pipelineParallelSize=2).
4. Confirm pods are distributed across GPU nodes and TLS certificates are provisioned.
5. Test pod resilience by deleting head and worker pods and verifying inference recovery.
6. Validate TLS secret reconciliation, runtime deletion cleanup, and runtime re-creation.
7. Test external route inference and dynamic updates to tensor/pipeline parallel sizes.
8. Verify model args propagation to and removal from the vLLM command spec.
"""

def test_multi_node_ray_status(self, multi_node_predictor_pods_scope_class):
"""Test multi node ray status"""
verify_ray_status(pods=multi_node_predictor_pods_scope_class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@
indirect=True,
)
class TestOciMultiNode:
"""Validate multi-node GPU inference using OCI-based model storage on KServe.

Steps:
1. Deploy a multi-node vLLM inference service using an OCI model image.
2. Send an inference request over the external HTTPS route.
3. Verify the model returns a successful completion response.
"""

def test_oci_multi_node_basic_external_inference(self, multi_node_oci_inference_service):
"""Test multi node basic inference"""
verify_inference_response(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@
indirect=True,
)
class TestModelMetrics:
"""Validate OVMS model metrics are reported through OpenShift UserWorkloadMonitoring.

Steps:
1. Deploy an OVMS model car inference service with metrics enabled.
2. Send a single inference request and verify the success counter increments to 1.
3. Send multiple inference requests in parallel and verify the total success count.
4. Query Prometheus for CPU utilization metrics of the model namespace pods.
"""

@pytest.mark.tier1
def test_model_metrics_num_success_requests(self, model_car_inference_service, prometheus):
"""Verify number of successful model requests in OpenShift monitoring system (UserWorkloadMonitoring) metrics"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@
@pytest.mark.tier1
@pytest.mark.rawdeployment
class TestRawUnprivilegedUserMetrics:
"""Validate that an unprivileged user can access model metrics via UserWorkloadMonitoring.

Steps:
1. Deploy an OVMS model car inference service as a non-admin user with metrics enabled.
2. Send multiple inference requests to the model.
3. Query Prometheus and verify the success request count matches expectations.
"""

@pytest.mark.metrics
def test_non_admin_raw_metrics(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@
indirect=True,
)
class TestKServeDSCRawDefaultDeploymentMode:
"""Validate that the DSC default deployment mode propagates to inference services.

Steps:
1. Set the DSC default deployment mode to RawDeployment.
2. Deploy an OVMS inference service without explicitly specifying a deployment mode.
3. Verify the ISVC annotation reflects RawDeployment as the deployment mode.
4. Send an inference request and verify a successful response over HTTPS.
"""

def test_isvc_contains_raw_deployment_mode(self, default_deployment_mode_in_dsc, ovms_inference_service):
"""Verify that default deployment mode is set to raw in inference service."""
assert (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ def wait_for_isvc_model_status(isvc: InferenceService, target_model_state: str,
indirect=True,
)
class TestInferenceServiceCustomResources:
"""Validate InferenceService status transitions when the model storage path is invalid and then corrected.

Steps:
1. Deploy an ISVC with a non-existing S3 model path.
2. Verify the model status transitions to FailedToLoad / BlockedByFailedLoad.
3. Update the ISVC with a valid S3 model path.
4. Verify the model status transitions to Loaded / UpToDate.
"""

@pytest.mark.dependency(name="test_isvc_with_invalid_models_s3_path")
def test_isvc_with_invalid_models_s3_path(self, invalid_s3_models_inference_service):
"""Test ISVC status with invalid models storage path"""
Expand Down
Loading
Loading