opendatahub-io · mwaykole · Mar 25, 2026 · Mar 25, 2026 · coderabbitai · Mar 25, 2026
@@ -21,6 +21,16 @@
     indirect=True,
 )
 class TestKserveTokenAuthenticationRawForRest:
+    """Validate KServe raw deployment token-based authentication for REST inference.
+
+    Steps:
+        1. Deploy an OVMS model with authentication enabled in a raw deployment namespace.
+        2. Query the model with a valid token and verify a successful REST inference response.
+        3. Disable authentication and verify the model is still queryable without a token.
+        4. Re-enable authentication and verify the model requires a valid token again.
+        5. Attempt cross-model authentication using another model's token and verify access is denied.
+    """
+
     @pytest.mark.smoke
     @pytest.mark.ocp_interop
     @pytest.mark.dependency(name="test_model_authentication_using_rest_raw")

@@ -21,6 +21,14 @@
 @pytest.mark.smoke
 @pytest.mark.rawdeployment
 class TestRawUnprivilegedUser:
+    """Validate that a non-admin user can deploy and query a KServe raw deployment model.
+
+    Steps:
+        1. Create a namespace with unprivileged user credentials.
+        2. Deploy an OVMS model as a raw deployment using the non-admin user.
+        3. Query the deployed model via REST and verify a successful inference response.
+    """
+
     def test_non_admin_deploy_raw_and_query_model(
         self,
         unprivileged_s3_ovms_raw_inference_service,

@@ -19,12 +19,22 @@
     indirect=True,
 )
 class TestInferenceGraphRaw:
+    """Validate KServe InferenceGraph functionality in raw deployment mode.
+
+    Steps:
+        1. Deploy an OVMS serving runtime with ONNX support in the test namespace.
+        2. Create inference graphs with various configurations (public, private, auth-enabled).
+        3. Send inference requests through each graph and verify correct routing and responses.
+        4. Verify authentication enforcement by testing with privileged and unprivileged tokens.
+    """
+
     @pytest.mark.parametrize(
         "dog_breed_inference_graph",
         [{"name": "dog-breed-raw-pipeline", "deployment-mode": KServeDeploymentType.RAW_DEPLOYMENT}],
         indirect=True,
     )
     def test_inference_graph_raw_deployment(self, dog_breed_inference_graph):
+        """Verify inference through a public raw deployment inference graph."""
         verify_inference_response(
             inference_service=dog_breed_inference_graph,
             inference_config=ONNX_INFERENCE_CONFIG,
@@ -46,6 +56,7 @@ def test_inference_graph_raw_deployment(self, dog_breed_inference_graph):
         indirect=True,
     )
     def test_private_inference_graph_raw_deployment(self, dog_breed_inference_graph):
+        """Verify inference through a private (no external route) raw deployment inference graph."""
         verify_inference_response(
             inference_service=dog_breed_inference_graph,
             inference_config=ONNX_INFERENCE_CONFIG,
@@ -69,6 +80,7 @@ def test_private_inference_graph_raw_deployment(self, dog_breed_inference_graph)
         indirect=True,
     )
     def test_inference_graph_raw_authentication(self, dog_breed_inference_graph, inference_graph_sa_token_with_access):
+        """Verify inference through an auth-enabled inference graph using an authorized service account token."""
         verify_inference_response(
             inference_service=dog_breed_inference_graph,
             inference_config=ONNX_INFERENCE_CONFIG,
@@ -94,6 +106,7 @@ def test_inference_graph_raw_authentication(self, dog_breed_inference_graph, inf
     def test_private_inference_graph_raw_authentication(
         self, dog_breed_inference_graph, inference_graph_sa_token_with_access
     ):
+        """Verify inference through a private auth-enabled inference graph using an authorized token."""
         verify_inference_response(
             inference_service=dog_breed_inference_graph,
             inference_config=ONNX_INFERENCE_CONFIG,
@@ -119,6 +132,7 @@ def test_private_inference_graph_raw_authentication(
     def test_inference_graph_raw_authentication_without_privileges(
         self, dog_breed_inference_graph, inference_graph_unprivileged_sa_token
     ):
+        """Verify that an unprivileged token is denied access to an auth-enabled inference graph."""
         verify_inference_response(
             inference_service=dog_breed_inference_graph,
             inference_config=ONNX_INFERENCE_CONFIG,
@@ -145,6 +159,7 @@ def test_inference_graph_raw_authentication_without_privileges(
     def test_private_inference_graph_raw_authentication_without_privileges(
         self, dog_breed_inference_graph, inference_graph_unprivileged_sa_token
     ):
+        """Verify that an unprivileged token is denied access to a private auth-enabled inference graph."""
         verify_inference_response(
             inference_service=dog_breed_inference_graph,
             inference_config=ONNX_INFERENCE_CONFIG,

@@ -43,6 +43,15 @@
     indirect=True,
 )
 class TestRawISVCEnvVarsUpdates:
+    """Validate adding and removing environment variables on a KServe raw deployment ISVC.
+
+    Steps:
+        1. Deploy an OVMS inference service with custom environment variables.
+        2. Verify the environment variables are present in the predictor pods.
+        3. Remove the environment variables from the inference service.
+        4. Verify the environment variables are no longer present in the predictor pods.
+    """
+
     def test_raw_with_isvc_env_vars(self, ovms_kserve_inference_service):
         """Test adding environment variables to the inference service"""
         verify_env_vars_in_isvc_pods(isvc=ovms_kserve_inference_service, env_vars=ISVC_ENV_VARS, vars_exist=True)

@@ -24,6 +24,15 @@
     indirect=True,
 )
 class TestISVCPullSecretUpdate:
+    """Validate pull secret lifecycle operations on a KServe model car inference service.
+
+    Steps:
+        1. Deploy a model car ISVC with an initial pull secret attached.
+        2. Verify the initial pull secret is correctly set in the predictor pod.
+        3. Update the pull secret to a new value and verify it is reflected in the new pod.
+        4. Remove the pull secret and verify it is no longer present in the pod.
+    """
+
     @pytest.mark.tier1
     def test_initial_pull_secret_set(self, model_car_raw_inference_service_with_pull_secret):
         """Ensure initial pull secret is correctly set in the pod"""
@@ -36,4 +45,5 @@ def test_update_pull_secret(self, updated_isvc_pull_secret):
         verify_pull_secret(isvc=updated_isvc_pull_secret, pull_secret=UPDATED_PULL_SECRET, secret_exists=True)
 
     def test_remove_pull_secret(self, updated_isvc_remove_pull_secret):
+        """Remove the pull secret and verify it is no longer present in the pod."""
         verify_pull_secret(isvc=updated_isvc_remove_pull_secret, pull_secret=UPDATED_PULL_SECRET, secret_exists=False)
@@ -39,6 +39,16 @@
     indirect=True,
 )
 class TestRawISVCReplicasUpdates:
+    """Validate scaling replica count up and down on a KServe raw deployment ISVC.
+
+    Steps:
+        1. Deploy an OVMS inference service with 2 min-replicas and 4 max-replicas.
+        2. Verify that 2 predictor pods are running after deployment.
+        3. Run inference to confirm the model responds correctly with multiple replicas.
+        4. Patch the ISVC to scale down to 1 replica and verify only 1 pod remains.
+        5. Run inference again to confirm the model responds correctly after scale-down.
+    """
+
     @pytest.mark.dependency(name="test_raw_increase_isvc_replicas")
     def test_raw_increase_isvc_replicas(self, isvc_pods, ovms_kserve_inference_service):
         """Test replicas increase"""

@@ -33,6 +33,15 @@
     indirect=True,
 )
 class TestStopRaw:
+    """Validate stopping a running KServe raw deployment model via the stop annotation.
+
+    Steps:
+        1. Deploy an OVMS ONNX model as a raw deployment with stop set to false.
+        2. Verify the model can be queried via REST.
+        3. Patch the ISVC stop annotation to true.
+        4. Verify all predictor pods are deleted and remain absent.
+    """
+
     def test_raw_onnx_rest_inference(
         self, unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_raw_inference_service
     ):
@@ -58,8 +67,7 @@ def test_stop_and_update_to_true_delete_pod_rollout(
         ovms_raw_inference_service,
         patched_raw_inference_service_stop_annotation,
     ):
-        """Verify pod rollout is deleted when the stop annotation updated to true"""
-        """Verify pods do not exist"""
+        """Verify pod rollout is deleted when the stop annotation is updated to true."""
         result = consistently_verify_no_pods_exist(
             client=unprivileged_client,
             isvc=patched_raw_inference_service_stop_annotation,
@@ -86,15 +94,23 @@ def test_stop_and_update_to_true_delete_pod_rollout(
     indirect=True,
 )
 class TestStoppedResumeRaw:
+    """Validate resuming a stopped KServe raw deployment model by clearing the stop annotation.
+
+    Steps:
+        1. Deploy an OVMS ONNX model as a raw deployment with stop set to true.
+        2. Verify no predictor pods are created while the stop annotation is true.
+        3. Patch the ISVC stop annotation to false.
+        4. Verify predictor pods are rolled out and the model can be queried via REST.
+    """
+
     def test_stop_and_true_no_pod_rollout(
         self,
         unprivileged_client,
         unprivileged_model_namespace,
         ovms_kserve_serving_runtime,
         ovms_raw_inference_service,
     ):
-        """Verify no pod rollout when the stop annotation is true"""
-        """Verify pods do not exist"""
+        """Verify no pod rollout when the stop annotation is true."""
         result = consistently_verify_no_pods_exist(
             client=unprivileged_client,
             isvc=ovms_raw_inference_service,
@@ -114,8 +130,7 @@ def test_stop_and_update_to_false_pod_rollout(
         ovms_raw_inference_service,
         patched_raw_inference_service_stop_annotation,
     ):
-        """Verify pod rollout when the stop annotation is updated to false"""
-        """Verify that kserve Raw ONNX model can be queried using REST"""
+        """Verify pod rollout and REST inference after the stop annotation is set to false."""
         verify_inference_response(
             inference_service=patched_raw_inference_service_stop_annotation,
             inference_config=ONNX_INFERENCE_CONFIG,

@@ -39,11 +39,11 @@ def test_raw_onnx_rout_reconciliation(self, ovms_raw_inference_service):
         )
 
     def test_route_value_before_and_after_deletion(self, unprivileged_client, ovms_raw_inference_service):
-        # Validate ingress status before and after route deletion
+        """Verify that the ingress status changes after the route is deleted and recreated."""
         assert_ingress_status_changed(client=unprivileged_client, inference_service=ovms_raw_inference_service)
 
     def test_model_works_after_route_is_recreated(self, ovms_raw_inference_service):
-        # Final inference validation after route update
+        """Verify that the model is still queryable via REST after the route is recreated."""
         verify_inference_response(
             inference_service=ovms_raw_inference_service,
             inference_config=ONNX_INFERENCE_CONFIG,

@@ -44,6 +44,16 @@
     indirect=True,
 )
 class TestRestRawDeploymentRoutes:
+    """Validate REST route visibility transitions for KServe raw deployment.
+
+    Steps:
+        1. Deploy a Caikit-TGIS model as a raw deployment with HTTP enabled.
+        2. Verify the default route visibility label is not set.
+        3. Query the model via the internal route and confirm a successful response.
+        4. Patch the ISVC to expose the route externally and verify inference over HTTPS.
+        5. Revert the route to local-cluster visibility and verify external access is disabled.
+    """
+
     def test_default_visibility_value(self, s3_models_inference_service):
         """Test default route visibility value"""
         if labels := s3_models_inference_service.labels:
@@ -126,6 +136,15 @@ def test_disabled_rest_raw_deployment_exposed_route(self, patched_s3_caikit_kser
     indirect=True,
 )
 class TestRestRawDeploymentRoutesTimeout:
+    """Validate REST route timeout behavior for KServe raw deployment.
+
+    Steps:
+        1. Deploy a Caikit-TGIS model as a raw deployment with an external route.
+        2. Verify inference succeeds over the exposed HTTPS route.
+        3. Patch the route with an extremely low timeout annotation.
+        4. Verify inference fails with a 504 Gateway Time-out error.
+    """
+
     @pytest.mark.dependency(name="test_rest_raw_deployment_exposed_route")
     def test_rest_raw_deployment_exposed_route(self, s3_models_inference_service):
         """Test HTTP inference using exposed (external) route"""
@@ -191,6 +210,14 @@ def test_rest_raw_deployment_exposed_route_with_timeout(self, s3_models_inferenc
 )
 @pytest.mark.skip(reason="skipping grpc raw for tgis-caikit")
 class TestGrpcRawDeployment:
+    """Validate gRPC route visibility transitions for KServe raw deployment.
+
+    Steps:
+        1. Deploy a Caikit-TGIS model as a raw deployment with gRPC enabled.
+        2. Query the model via the internal gRPC route and confirm a successful streaming response.
+        3. Patch the ISVC to expose the route externally and verify gRPC inference over the exposed route.
+    """
+
     def test_grpc_raw_deployment_internal_route(self, s3_models_inference_service):
         """Test GRPC inference using internal route"""
         verify_inference_response(
@@ -247,6 +274,15 @@ def test_grpc_raw_deployment_exposed_route(self, patched_s3_caikit_kserve_isvc_v
 )
 @pytest.mark.skip(reason="skipping grpc raw for tgis-caikit")
 class TestGrpcRawDeploymentTimeout:
+    """Validate gRPC route timeout behavior for KServe raw deployment.
+
+    Steps:
+        1. Deploy a Caikit-TGIS model as a raw deployment with an external gRPC route.
+        2. Verify gRPC inference succeeds over the exposed route.
+        3. Patch the route with an extremely low timeout annotation.
+        4. Verify gRPC inference fails with a 504 Gateway Time-out error.
+    """
+
     @pytest.mark.dependency(name="test_grpc_raw_deployment_exposed_route")
     def test_grpc_raw_deployment_exposed_route(self, s3_models_inference_service):
         """Test GRPC inference using exposed (external) route"""

@@ -47,6 +47,19 @@
     indirect=True,
 )
 class TestMultiNode:
+    """Validate multi-node GPU inference with Ray-based vLLM serving on KServe.
+
+    Steps:
+        1. Deploy a Granite-8B model on a multi-node vLLM inference service backed by PVC storage.
+        2. Verify Ray cluster health and NVIDIA GPU status across head and worker pods.
+        3. Validate default runtime worker spec (tensorParallelSize=1, pipelineParallelSize=2).
+        4. Confirm pods are distributed across GPU nodes and TLS certificates are provisioned.
+        5. Test pod resilience by deleting head and worker pods and verifying inference recovery.
+        6. Validate TLS secret reconciliation, runtime deletion cleanup, and runtime re-creation.
+        7. Test external route inference and dynamic updates to tensor/pipeline parallel sizes.
+        8. Verify model args propagation to and removal from the vLLM command spec.
+    """
+
     def test_multi_node_ray_status(self, multi_node_predictor_pods_scope_class):
         """Test multi node ray status"""
         verify_ray_status(pods=multi_node_predictor_pods_scope_class)

@@ -27,6 +27,14 @@
     indirect=True,
 )
 class TestOciMultiNode:
+    """Validate multi-node GPU inference using OCI-based model storage on KServe.
+
+    Steps:
+        1. Deploy a multi-node vLLM inference service using an OCI model image.
+        2. Send an inference request over the external HTTPS route.
+        3. Verify the model returns a successful completion response.
+    """
+
     def test_oci_multi_node_basic_external_inference(self, multi_node_oci_inference_service):
         """Test multi node basic inference"""
         verify_inference_response(

@@ -42,6 +42,15 @@
     indirect=True,
 )
 class TestModelMetrics:
+    """Validate OVMS model metrics are reported through OpenShift UserWorkloadMonitoring.
+
+    Steps:
+        1. Deploy an OVMS model car inference service with metrics enabled.
+        2. Send a single inference request and verify the success counter increments to 1.
+        3. Send multiple inference requests in parallel and verify the total success count.
+        4. Query Prometheus for CPU utilization metrics of the model namespace pods.
+    """
+
     @pytest.mark.tier1
     def test_model_metrics_num_success_requests(self, model_car_inference_service, prometheus):
         """Verify number of successful model requests in OpenShift monitoring system (UserWorkloadMonitoring) metrics"""

@@ -37,6 +37,14 @@
 @pytest.mark.tier1
 @pytest.mark.rawdeployment
 class TestRawUnprivilegedUserMetrics:
+    """Validate that an unprivileged user can access model metrics via UserWorkloadMonitoring.
+
+    Steps:
+        1. Deploy an OVMS model car inference service as a non-admin user with metrics enabled.
+        2. Send multiple inference requests to the model.
+        3. Query Prometheus and verify the success request count matches expectations.
+    """
+
     @pytest.mark.metrics
     def test_non_admin_raw_metrics(
         self,

@@ -42,6 +42,15 @@
     indirect=True,
 )
 class TestKServeDSCRawDefaultDeploymentMode:
+    """Validate that the DSC default deployment mode propagates to inference services.
+
+    Steps:
+        1. Set the DSC default deployment mode to RawDeployment.
+        2. Deploy an OVMS inference service without explicitly specifying a deployment mode.
+        3. Verify the ISVC annotation reflects RawDeployment as the deployment mode.
+        4. Send an inference request and verify a successful response over HTTPS.
+    """
+
     def test_isvc_contains_raw_deployment_mode(self, default_deployment_mode_in_dsc, ovms_inference_service):
         """Verify that default deployment mode is set to raw in inference service."""
         assert (

@@ -51,6 +51,15 @@ def wait_for_isvc_model_status(isvc: InferenceService, target_model_state: str,
     indirect=True,
 )
 class TestInferenceServiceCustomResources:
+    """Validate InferenceService status transitions when the model storage path is invalid and then corrected.
+
+    Steps:
+        1. Deploy an ISVC with a non-existing S3 model path.
+        2. Verify the model status transitions to FailedToLoad / BlockedByFailedLoad.
+        3. Update the ISVC with a valid S3 model path.
+        4. Verify the model status transitions to Loaded / UpToDate.
+    """
+
     @pytest.mark.dependency(name="test_isvc_with_invalid_models_s3_path")
     def test_isvc_with_invalid_models_s3_path(self, invalid_s3_models_inference_service):
         """Test ISVC status with invalid models storage path"""