Skip to content

Commit 3de40be

Browse files
ybrodsky-rhclaude
andcommitted
Add KServe inference test suite for Neuron
Add a new Ginkgo test suite under tests/hw-accel/neuron/kserve/ that validates KServe InferenceService deployment and inference on AWS Neuron hardware with OpenShift AI. Test cases: - kserve-001: Deploy InferenceService and verify Ready state - kserve-002: Send inference request and validate response Depends on eco-goinfra KServe builders (rh-ecosystem-edge/eco-goinfra#1337). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent df020c0 commit 3de40be

5 files changed

Lines changed: 386 additions & 1 deletion

File tree

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
package do
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/json"
7+
"fmt"
8+
"time"
9+
10+
"github.com/rh-ecosystem-edge/eco-goinfra/pkg/clients"
11+
"github.com/rh-ecosystem-edge/eco-gotests/tests/hw-accel/neuron/params"
12+
corev1 "k8s.io/api/core/v1"
13+
apierrors "k8s.io/apimachinery/pkg/api/errors"
14+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15+
"k8s.io/apimachinery/pkg/util/wait"
16+
"k8s.io/klog/v2"
17+
)
18+
19+
// KServeInferenceConfig holds configuration for KServe inference requests.
20+
type KServeInferenceConfig struct {
21+
InferenceServiceURL string
22+
Namespace string
23+
ModelName string
24+
Timeout time.Duration
25+
}
26+
27+
// ExecuteKServeInference sends an inference request to a KServe InferenceService endpoint.
28+
// It creates a temporary curl pod, sends the request, and retries until success or timeout.
29+
func ExecuteKServeInference(apiClient *clients.Settings, config KServeInferenceConfig) (string, error) {
30+
ctx, cancel := context.WithTimeout(context.Background(), config.Timeout)
31+
defer cancel()
32+
33+
jsonBody, err := buildInferenceRequestBody(config.ModelName)
34+
if err != nil {
35+
return "", fmt.Errorf("failed to marshal inference request: %w", err)
36+
}
37+
38+
podName := "kserve-inference-test-curl"
39+
40+
if err := ensureCurlPod(ctx, apiClient, podName, config.Namespace); err != nil {
41+
return "", fmt.Errorf("failed to create curl pod: %w", err)
42+
}
43+
44+
defer cleanupCurlPod(apiClient, podName, config.Namespace)
45+
46+
endpoint := fmt.Sprintf("%s/v1/chat/completions", config.InferenceServiceURL)
47+
48+
curlCmd := []string{
49+
"curl", "-sk",
50+
"-X", "POST",
51+
endpoint,
52+
"-H", "Content-Type: application/json",
53+
"-d", string(jsonBody),
54+
"--max-time", "60",
55+
}
56+
57+
const retryInterval = 30 * time.Second
58+
59+
var inferenceResult string
60+
61+
pollErr := wait.PollUntilContextTimeout(
62+
ctx, retryInterval, config.Timeout, true,
63+
func(pollCtx context.Context) (bool, error) {
64+
execCtx, execCancel := context.WithTimeout(pollCtx, 90*time.Second)
65+
defer execCancel()
66+
67+
response, execErr := executeInPod(execCtx, apiClient, podName, config.Namespace, "curl", curlCmd)
68+
if execErr != nil {
69+
klog.V(params.NeuronLogLevel).Infof(
70+
"KServe inference attempt failed (model may still be compiling): %v", execErr)
71+
72+
return false, nil
73+
}
74+
75+
content, extractErr := extractInferenceContent(response)
76+
if extractErr != nil {
77+
klog.V(params.NeuronLogLevel).Infof(
78+
"KServe inference response not ready: %v", extractErr)
79+
80+
return false, nil
81+
}
82+
83+
inferenceResult = content
84+
85+
return true, nil
86+
})
87+
if pollErr != nil {
88+
return "", fmt.Errorf("KServe inference failed after %v: %w", config.Timeout, pollErr)
89+
}
90+
91+
return inferenceResult, nil
92+
}
93+
94+
// ensureCurlPod creates a long-running curl pod for executing inference requests.
95+
func ensureCurlPod(ctx context.Context, apiClient *clients.Settings, name, namespace string) error {
96+
_, err := apiClient.Pods(namespace).Get(ctx, name, metav1.GetOptions{})
97+
if err == nil {
98+
return nil
99+
}
100+
101+
pod := &corev1.Pod{
102+
ObjectMeta: metav1.ObjectMeta{
103+
Name: name,
104+
Namespace: namespace,
105+
Annotations: map[string]string{
106+
"sidecar.istio.io/inject": "false",
107+
},
108+
},
109+
Spec: corev1.PodSpec{
110+
Containers: []corev1.Container{
111+
{
112+
Name: "curl",
113+
Image: "registry.access.redhat.com/ubi9/ubi-minimal:latest",
114+
Command: []string{"sleep", "3600"},
115+
},
116+
},
117+
RestartPolicy: corev1.RestartPolicyNever,
118+
},
119+
}
120+
121+
_, err = apiClient.Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
122+
if err != nil && !apierrors.IsAlreadyExists(err) {
123+
return err
124+
}
125+
126+
return wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true,
127+
func(pollCtx context.Context) (bool, error) {
128+
p, getErr := apiClient.Pods(namespace).Get(pollCtx, name, metav1.GetOptions{})
129+
if getErr != nil {
130+
return false, nil
131+
}
132+
133+
return p.Status.Phase == corev1.PodRunning, nil
134+
})
135+
}
136+
137+
// cleanupCurlPod removes the temporary curl pod.
138+
func cleanupCurlPod(apiClient *clients.Settings, name, namespace string) {
139+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
140+
defer cancel()
141+
142+
err := apiClient.Pods(namespace).Delete(ctx, name, metav1.DeleteOptions{})
143+
if err != nil && !apierrors.IsNotFound(err) {
144+
klog.V(params.NeuronLogLevel).Infof("Failed to delete curl pod: %v", err)
145+
}
146+
}
147+
148+
// ParseInferenceResponse parses a raw chat completions JSON response.
149+
func ParseInferenceResponse(response string) (string, error) {
150+
var result map[string]interface{}
151+
if err := json.Unmarshal([]byte(response), &result); err != nil {
152+
return "", fmt.Errorf("failed to decode response: %w, raw: %s", err, response)
153+
}
154+
155+
if errMsg, ok := result["error"]; ok {
156+
raw, _ := json.Marshal(errMsg)
157+
158+
return "", fmt.Errorf("inference returned error: %s", string(raw))
159+
}
160+
161+
var buf bytes.Buffer
162+
163+
enc := json.NewEncoder(&buf)
164+
enc.SetIndent("", " ")
165+
166+
if err := enc.Encode(result); err != nil {
167+
return fmt.Sprintf("%v", result), nil
168+
}
169+
170+
return buf.String(), nil
171+
}

tests/hw-accel/neuron/internal/neuronconfig/config.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ type NeuronConfig struct {
4343
InstanceType string
4444
// StorageClassName is the storage class for model PVC (default: gp3-csi).
4545
StorageClassName string
46+
// KServeModelName is the HuggingFace model for KServe inference tests.
47+
KServeModelName string
48+
// KServeVLLMImage is the vLLM Neuron image for the KServe ServingRuntime.
49+
KServeVLLMImage string
50+
// KServeNamespace is the namespace where KServe resources are deployed.
51+
KServeNamespace string
52+
// KServeTensorParallelSize is the tensor parallel size for KServe vLLM.
53+
KServeTensorParallelSize string
4654
}
4755

4856
// NewNeuronConfig creates a new NeuronConfig from environment variables.
@@ -65,6 +73,10 @@ func NewNeuronConfig() *NeuronConfig {
6573
ImageRepoSecretName: os.Getenv("ECO_HWACCEL_NEURON_IMAGE_REPO_SECRET"),
6674
InstanceType: os.Getenv("ECO_HWACCEL_NEURON_INSTANCE_TYPE"),
6775
StorageClassName: os.Getenv("ECO_HWACCEL_NEURON_STORAGE_CLASS"),
76+
KServeModelName: os.Getenv("ECO_HWACCEL_NEURON_KSERVE_MODEL_NAME"),
77+
KServeVLLMImage: os.Getenv("ECO_HWACCEL_NEURON_KSERVE_VLLM_IMAGE"),
78+
KServeNamespace: os.Getenv("ECO_HWACCEL_NEURON_KSERVE_NAMESPACE"),
79+
KServeTensorParallelSize: os.Getenv("ECO_HWACCEL_NEURON_KSERVE_TENSOR_PARALLEL_SIZE"),
6880
}
6981

7082
// Set defaults
@@ -86,10 +98,21 @@ func NewNeuronConfig() *NeuronConfig {
8698
}
8799

88100
if config.StorageClassName == "" {
89-
// Default storage class for ROSA/AWS
90101
config.StorageClassName = "gp3-csi"
91102
}
92103

104+
if config.KServeModelName == "" {
105+
config.KServeModelName = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
106+
}
107+
108+
if config.KServeNamespace == "" {
109+
config.KServeNamespace = "neuron-inference"
110+
}
111+
112+
if config.KServeTensorParallelSize == "" {
113+
config.KServeTensorParallelSize = "1"
114+
}
115+
93116
klog.V(params.NeuronLogLevel).Infof("NeuronConfig loaded: DriversImage=%s, DevicePluginImage=%s, NodeMetricsImage=%s",
94117
config.DriversImage, config.DevicePluginImage, config.NodeMetricsImage)
95118

@@ -110,3 +133,8 @@ func (c *NeuronConfig) IsVLLMConfigured() bool {
110133
func (c *NeuronConfig) IsUpgradeConfigured() bool {
111134
return c.UpgradeTargetVersion != "" && c.UpgradeTargetDriversImage != ""
112135
}
136+
137+
// IsKServeConfigured checks if KServe testing configuration is present.
138+
func (c *NeuronConfig) IsKServeConfigured() bool {
139+
return c.HuggingFaceToken != "" && c.KServeNamespace != ""
140+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package tsparams
2+
3+
import "time"
4+
5+
const (
6+
LabelSuite = "kserve"
7+
8+
KServeTestNamespace = "neuron-inference"
9+
10+
ServingRuntimeName = "vllm-neuron-runtime"
11+
12+
ModelFormatName = "vllm-neuron"
13+
14+
ServiceAccountName = "kserve-neuron-sa"
15+
16+
InferenceServiceReadyTimeout = 30 * time.Minute
17+
18+
InferenceRequestTimeout = 5 * time.Minute
19+
20+
CurlPodName = "kserve-inference-test-curl"
21+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package kserve
2+
3+
import (
4+
"runtime"
5+
"testing"
6+
7+
. "github.com/onsi/ginkgo/v2"
8+
. "github.com/onsi/gomega"
9+
"github.com/rh-ecosystem-edge/eco-goinfra/pkg/reportxml"
10+
_ "github.com/rh-ecosystem-edge/eco-gotests/tests/hw-accel/neuron/kserve/tests"
11+
. "github.com/rh-ecosystem-edge/eco-gotests/tests/internal/inittools"
12+
)
13+
14+
var _, currentFile, _, _ = runtime.Caller(0)
15+
16+
func TestKServe(t *testing.T) {
17+
_, reporterConfig := GinkgoConfiguration()
18+
reporterConfig.JUnitReport = GeneralConfig.GetJunitReportPath(currentFile)
19+
20+
RegisterFailHandler(Fail)
21+
RunSpecs(t, "Neuron KServe Suite", reporterConfig)
22+
}
23+
24+
var _ = BeforeSuite(func() {
25+
By("Setting up Neuron KServe test suite")
26+
})
27+
28+
var _ = AfterSuite(func() {
29+
By("Tearing down Neuron KServe test suite")
30+
})
31+
32+
var _ = ReportAfterSuite("", func(report Report) {
33+
reportxml.Create(report, GeneralConfig.GetReportPath(), GeneralConfig.TCPrefix)
34+
})

0 commit comments

Comments
 (0)