Resolve inference endpoint using runtime protocol when applicable (kserve#4527)

israel-hdez · web-flow · commit 6fac083f5689 · 2025-06-16T12:58:23.000-04:00
Signed-off-by: Edgar Hernández &lt;23639005+israel-hdez@users.noreply.github.com&gt;
diff --git a/pkg/controller/v1alpha1/inferencegraph/controller.go b/pkg/controller/v1alpha1/inferencegraph/controller.go
@@ -160,7 +160,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 				err := r.Client.Get(ctx, types.NamespacedName{Namespace: graph.Namespace, Name: route.ServiceName}, &isvc)
 				if err == nil {
 					if graph.Spec.Nodes[node].Steps[i].ServiceURL == "" {
-						serviceUrl, err := isvcutils.GetPredictorEndpoint(&isvc)
+						serviceUrl, err := isvcutils.GetPredictorEndpoint(ctx, r.Client, &isvc)
 						if err == nil {
 							graph.Spec.Nodes[node].Steps[i].ServiceURL = serviceUrl
 						} else {
diff --git a/pkg/controller/v1beta1/inferenceservice/utils/utils.go b/pkg/controller/v1beta1/inferenceservice/utils/utils.go
@@ -136,7 +136,7 @@ func GetModelName(isvc *v1beta1.InferenceService) string {
 }
 
 // GetPredictorEndpoint returns the predictor endpoint if status.address.url is not nil else returns empty string with error.
-func GetPredictorEndpoint(isvc *v1beta1.InferenceService) (string, error) {
+func GetPredictorEndpoint(ctx context.Context, client client.Client, isvc *v1beta1.InferenceService) (string, error) {
 	if isvc.Status.Address != nil && isvc.Status.Address.URL != nil {
 		hostName := isvc.Status.Address.URL.String()
 		path := ""
@@ -149,7 +149,47 @@ func GetPredictorEndpoint(isvc *v1beta1.InferenceService) (string, error) {
 				path = constants.PredictPath(modelName, constants.ProtocolV2)
 			}
 		} else if !IsMMSPredictor(&isvc.Spec.Predictor) {
-			protocol := isvc.Spec.Predictor.GetImplementation().GetProtocol()
+			predictorImplementation := isvc.Spec.Predictor.GetImplementation()
+			protocol := predictorImplementation.GetProtocol()
+
+			if modelSpec, ok := predictorImplementation.(*v1beta1.ModelSpec); ok {
+				if modelSpec.Runtime != nil {
+					// When a Runtime is specified, and there is no protocol specified
+					// in the ISVC, the protocol cannot imply to be V1. The protocol
+					// needs to be extracted from the Runtime.
+
+					runtime, err := GetServingRuntime(ctx, client, *modelSpec.Runtime, isvc.Namespace)
+					if err != nil {
+						return "", err
+					}
+
+					// If the runtime has protocol versions, use the first one supported by IG.
+					// Otherwise, assume Protocol V1.
+					if len(runtime.ProtocolVersions) != 0 {
+						found := false
+						for _, pversion := range runtime.ProtocolVersions {
+							if pversion == constants.ProtocolV1 || pversion == constants.ProtocolV2 {
+								protocol = pversion
+								found = true
+								break
+							}
+						}
+
+						if !found {
+							return "", errors.New("the runtime does not support a protocol compatible with Inference Graphs")
+						}
+					}
+				}
+
+				// else {
+				//   Notice that when using auto-selection (i.e. Runtime is nil), the
+				//   ISVC is assumed to be protocol v1. Thus, for auto-select, a runtime
+				//   will only match if it lists protocol v1 as supported. In this case,
+				//   the code above (protocol := predictorImplementation.GetProtocol()) would
+				//   already get the right protocol to configure in the InferenceGraph.
+				// }
+			}
+
 			if protocol == constants.ProtocolV1 {
 				path = constants.PredictPath(modelName, constants.ProtocolV1)
 			} else if protocol == constants.ProtocolV2 {
diff --git a/pkg/controller/v1beta1/inferenceservice/utils/utils_test.go b/pkg/controller/v1beta1/inferenceservice/utils/utils_test.go
@@ -22,6 +22,7 @@ import (
 	"testing"
 
 	"github.com/onsi/gomega/types"
+	"k8s.io/utils/ptr"
 	"knative.dev/pkg/apis"
 	knativeV1 "knative.dev/pkg/apis/duck/v1"
 
@@ -1561,6 +1562,34 @@ func TestGetPredictorEndpoint(t *testing.T) {
 			"cpu": resource.MustParse("90m"),
 		},
 	}
+	namespace := "default"
+
+	s := runtime.NewScheme()
+	err := v1alpha1.AddToScheme(s)
+	if err != nil {
+		t.Errorf("Failed to add v1alpha1 to scheme %s", err)
+	}
+	protocolV1Runtime := &v1alpha1.ServingRuntime{
+		TypeMeta: metav1.TypeMeta{},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "mocked-v1-runtime",
+			Namespace: namespace,
+		},
+		Spec: v1alpha1.ServingRuntimeSpec{
+			ProtocolVersions: []constants.InferenceServiceProtocol{"v1"},
+		},
+	}
+	protocolV2Runtime := &v1alpha1.ServingRuntime{
+		TypeMeta: metav1.TypeMeta{},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "mocked-v2-runtime",
+			Namespace: namespace,
+		},
+		Spec: v1alpha1.ServingRuntimeSpec{
+			ProtocolVersions: []constants.InferenceServiceProtocol{"v2"},
+		},
+	}
+	mockClient := fake.NewClientBuilder().WithScheme(s).WithObjects(protocolV1Runtime, protocolV2Runtime).Build()
 
 	scenarios := map[string]struct {
 		isvc        InferenceService
@@ -1829,11 +1858,73 @@ func TestGetPredictorEndpoint(t *testing.T) {
 			expectedUrl: "",
 			expectedErr: gomega.MatchError("service sklearn is not ready"),
 		},
+		"NoProtocolWithRuntimeProtocolV1": {
+			isvc: InferenceService{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "sklearn",
+					Namespace: namespace,
+				},
+				Spec: InferenceServiceSpec{
+					Predictor: PredictorSpec{
+						Model: &ModelSpec{
+							Runtime: ptr.To("mocked-v1-runtime"),
+							ModelFormat: ModelFormat{
+								Name: "sklearn",
+							},
+							PredictorExtensionSpec: PredictorExtensionSpec{
+								StorageURI: proto.String("s3://test"),
+							},
+						},
+					},
+				},
+				Status: InferenceServiceStatus{
+					Address: &knativeV1.Addressable{
+						URL: &apis.URL{
+							Scheme: "http",
+							Host:   "sklearn-predictor.default.svc.cluster.local",
+						},
+					},
+				},
+			},
+			expectedUrl: "http://sklearn-predictor.default.svc.cluster.local/v1/models/sklearn:predict",
+			expectedErr: gomega.BeNil(),
+		},
+		"NoProtocolWithRuntimeProtocolV2": {
+			isvc: InferenceService{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "sklearn",
+					Namespace: namespace,
+				},
+				Spec: InferenceServiceSpec{
+					Predictor: PredictorSpec{
+						Model: &ModelSpec{
+							Runtime: ptr.To("mocked-v2-runtime"),
+							ModelFormat: ModelFormat{
+								Name: "sklearn",
+							},
+							PredictorExtensionSpec: PredictorExtensionSpec{
+								StorageURI: proto.String("s3://test"),
+							},
+						},
+					},
+				},
+				Status: InferenceServiceStatus{
+					Address: &knativeV1.Addressable{
+						URL: &apis.URL{
+							Scheme: "http",
+							Host:   "sklearn-predictor.default.svc.cluster.local",
+						},
+					},
+				},
+			},
+			expectedUrl: "http://sklearn-predictor.default.svc.cluster.local/v2/models/sklearn/infer",
+			expectedErr: gomega.BeNil(),
+		},
 	}
 
 	for name, scenario := range scenarios {
 		t.Run(name, func(t *testing.T) {
-			res, err := GetPredictorEndpoint(&scenario.isvc)
+			res, err := GetPredictorEndpoint(t.Context(), mockClient, &scenario.isvc)
 			g.Expect(err).To(scenario.expectedErr)
 			if !g.Expect(res).To(gomega.Equal(scenario.expectedUrl)) {
 				t.Errorf("got %s, want %s", res, scenario.expectedUrl)