Skip to content

Commit b630bd8

Browse files
committed
fix model status for NIMs incompatible with openapi schema
Signed-off-by: Varun Ramachandra Sekar <vsekar@nvidia.com>
1 parent 4071afe commit b630bd8

5 files changed

Lines changed: 283 additions & 49 deletions

File tree

internal/controller/platform/standalone/nimservice.go

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package standalone
1919
import (
2020
"context"
2121
"fmt"
22+
"strings"
2223
"time"
2324

2425
"github.com/go-logr/logr"
@@ -318,26 +319,70 @@ func (r *NIMServiceReconciler) getNIMModelName(ctx context.Context, nimServiceEn
318319
// List nimservice /v1/models endpoint.
319320
modelsList, err := nimmodels.ListModelsV1(ctx, nimServiceEndpoint)
320321
if err != nil {
322+
logger.Error(err, "Failed to list models", "endpoint", nimServiceEndpoint)
323+
// Check if it's an HTTP error with a status code
324+
if nimmodels.IsNotFound(err) {
325+
// The endpoint does not exist
326+
logger.V(2).Error(err, "URI does not exist", "uri", nimmodels.ModelsV1URI, "endpoint", nimServiceEndpoint)
327+
metadata, err := nimmodels.GetMetadataV1(ctx, nimServiceEndpoint)
328+
if err != nil {
329+
logger.Error(err, "Failed to get metadata", "endpoint", nimServiceEndpoint)
330+
if nimmodels.IsNotFound(err) {
331+
logger.V(2).Error(err, "URI does not exist", "uri", nimmodels.MetadataV1URI, "endpoint", nimServiceEndpoint)
332+
return "", nil
333+
}
334+
return "", err
335+
}
336+
modelName, err := getModelNameFromMetadata(metadata)
337+
if err != nil {
338+
logger.V(2).Error(err, "Failed to get model name from metadata", "endpoint", nimServiceEndpoint)
339+
return "", nil
340+
}
341+
342+
return modelName, nil
343+
}
344+
321345
return "", err
322346
}
323347

324-
if modelsList.Object == nimmodels.ObjectTypeList {
325-
if len(modelsList.Data) == 1 {
326-
return modelsList.Data[0].Id, nil
327-
}
348+
modelName, err := getModelNameFromModelsList(modelsList)
349+
if err != nil {
350+
logger.V(2).Error(err, "Failed to get model name from models list", "endpoint", nimServiceEndpoint)
351+
return "", nil
352+
}
353+
return modelName, nil
354+
}
328355

329-
for _, model := range modelsList.Data {
330-
if model.Object != nimmodels.ObjectTypeModel {
331-
continue
332-
}
333-
if model.Root != nil && *model.Root == model.Id {
334-
return model.Id, nil
335-
}
356+
func getModelNameFromModelsList(modelsList *nimmodels.ModelsV1List) (string, error) {
357+
if modelsList.Object != nimmodels.ObjectTypeList {
358+
return "", fmt.Errorf("unexpected object type: %s", modelsList.Object)
359+
}
360+
361+
if len(modelsList.Data) == 0 {
362+
return "", fmt.Errorf("no models found")
363+
}
364+
365+
if len(modelsList.Data) == 1 {
366+
return modelsList.Data[0].Id, nil
367+
}
368+
369+
for _, model := range modelsList.Data {
370+
if model.Object != nimmodels.ObjectTypeModel {
371+
continue
372+
}
373+
if model.Root != nil && *model.Root == model.Id {
374+
return model.Id, nil
336375
}
337376
}
338377

339-
logger.Info("WARN: Failed to detect model name from /v1/models API response", "endpoint", nimServiceEndpoint)
340-
return "", nil
378+
return "", fmt.Errorf("no valid model found")
379+
}
380+
381+
func getModelNameFromMetadata(metadata *nimmodels.MetadataV1) (string, error) {
382+
if len(metadata.ModelInfo) == 0 {
383+
return "", fmt.Errorf("no model info found")
384+
}
385+
return strings.Split(metadata.ModelInfo[0].ShortName, ":")[0], nil
341386
}
342387

343388
func (r *NIMServiceReconciler) getNIMModelEndpoints(ctx context.Context, nimService *appsv1alpha1.NIMService) (string, string, error) {

internal/controller/platform/standalone/nimservice_test.go

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,13 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
394394

395395
// Start mock test server to serve nimservice endpoint.
396396
testServerHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
397-
w.WriteHeader(http.StatusOK)
398-
_, err := w.Write([]byte(`{"object": "list", "data":[{"id": "dummy-model", "object": "model", "root": "dummy-model", "parent": null}]}`))
399-
Expect(err).ToNot(HaveOccurred())
397+
if r.URL.Path == "/v1/models" {
398+
w.WriteHeader(http.StatusOK)
399+
_, err := w.Write([]byte(`{"object": "list", "data":[{"id": "dummy-model", "object": "model", "root": "dummy-model", "parent": null}]}`))
400+
Expect(err).ToNot(HaveOccurred())
401+
} else {
402+
w.WriteHeader(http.StatusNotFound)
403+
}
400404
})
401405
testServer = httptest.NewServer(testServerHandler)
402406
http.DefaultTransport = &mockTransport{
@@ -772,9 +776,13 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
772776

773777
It("should fail when models response is unmarshallable", func() {
774778
testServer.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
775-
w.WriteHeader(http.StatusOK)
776-
_, err := w.Write([]byte(`{"data": "invalid response"}`))
777-
Expect(err).ToNot(HaveOccurred())
779+
if r.URL.Path == "/v1/models" {
780+
w.WriteHeader(http.StatusOK)
781+
_, err := w.Write([]byte(`{"data": "invalid response"}`))
782+
Expect(err).ToNot(HaveOccurred())
783+
} else {
784+
w.WriteHeader(http.StatusNotFound)
785+
}
778786
})
779787
svc := &corev1.Service{
780788
ObjectMeta: metav1.ObjectMeta{
@@ -800,10 +808,14 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
800808

801809
It("should have empty model name when it cannot be inferred", func() {
802810
testServer.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
803-
w.WriteHeader(http.StatusOK)
804-
// Set dummy object type for model.
805-
_, err := w.Write([]byte(`{"object": "list", "data":[{"id": "dummy-model", "object": "dummy", "root": "dummy-model", "parent": null}]}`))
806-
Expect(err).ToNot(HaveOccurred())
811+
if r.URL.Path == "/v1/models" {
812+
w.WriteHeader(http.StatusOK)
813+
// Set dummy object type for model.
814+
_, err := w.Write([]byte(`{"object": "list", "data":[{"id": "dummy-model", "object": "dummy", "root": "dummy-model", "parent": null}]}`))
815+
Expect(err).ToNot(HaveOccurred())
816+
} else {
817+
w.WriteHeader(http.StatusNotFound)
818+
}
807819
})
808820
svc := &corev1.Service{
809821
ObjectMeta: metav1.ObjectMeta{
@@ -857,10 +869,14 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
857869

858870
It("should succeed when nimservice has lora adapter models attached", func() {
859871
testServer.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
860-
w.WriteHeader(http.StatusOK)
861-
// Set dummy object type for model.
862-
_, err := w.Write([]byte(`{"object": "list", "data":[{"id": "dummy-model-adapter1", "object": "model", "root": "dummy-model", "parent": null}, {"id": "dummy-model-adapter2", "object": "model", "root": "dummy-model", "parent": null}, {"id": "dummy-model", "object": "model", "root": "dummy-model", "parent": null}]}`))
863-
Expect(err).ToNot(HaveOccurred())
872+
if r.URL.Path == "/v1/models" {
873+
w.WriteHeader(http.StatusOK)
874+
// Set dummy object type for model.
875+
_, err := w.Write([]byte(`{"object": "list", "data":[{"id": "dummy-model-adapter1", "object": "model", "root": "dummy-model", "parent": null}, {"id": "dummy-model-adapter2", "object": "model", "root": "dummy-model", "parent": null}, {"id": "dummy-model", "object": "model", "root": "dummy-model", "parent": null}]}`))
876+
Expect(err).ToNot(HaveOccurred())
877+
} else {
878+
w.WriteHeader(http.StatusNotFound)
879+
}
864880
})
865881
svc := &corev1.Service{
866882
ObjectMeta: metav1.ObjectMeta{
@@ -888,6 +904,47 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
888904
Expect(modelStatus.Name).To(Equal("dummy-model"))
889905
})
890906

907+
Context("when nimservice only supports /v1/metadata", func() {
908+
It("should succeed when nimservice only supports /v1/metadata", func() {
909+
testServer.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
910+
switch r.URL.Path {
911+
case "/v1/models":
912+
w.WriteHeader(http.StatusNotFound)
913+
case "/v1/metadata":
914+
w.WriteHeader(http.StatusOK)
915+
_, err := w.Write([]byte(`{"modelInfo": [{"shortName": "dummy-model:dummy-version", "modelUrl": "ngc://org/team/dummy-model:dummy-version"}]}`))
916+
Expect(err).ToNot(HaveOccurred())
917+
default:
918+
w.WriteHeader(http.StatusNotFound)
919+
}
920+
})
921+
svc := &corev1.Service{
922+
ObjectMeta: metav1.ObjectMeta{
923+
Name: "test-nimservice",
924+
Namespace: "default",
925+
},
926+
Spec: corev1.ServiceSpec{
927+
Type: corev1.ServiceTypeClusterIP,
928+
ClusterIP: "127.0.0.1",
929+
Ports: []corev1.ServicePort{
930+
{
931+
Port: 8123,
932+
Name: "service-port",
933+
},
934+
},
935+
},
936+
}
937+
_ = client.Create(context.TODO(), svc)
938+
err := reconciler.updateModelStatus(context.Background(), nimService)
939+
Expect(err).ToNot(HaveOccurred())
940+
modelStatus := nimService.Status.Model
941+
Expect(modelStatus).ToNot(BeNil())
942+
Expect(modelStatus.ClusterEndpoint).To(Equal("127.0.0.1:8123"))
943+
Expect(modelStatus.ExternalEndpoint).To(Equal("test-nimservice.default.example.com"))
944+
Expect(modelStatus.Name).To(Equal("dummy-model"))
945+
946+
})
947+
})
891948
})
892949

893950
Describe("getNIMModelEndpoints", func() {

internal/nimmodels/errors.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
Copyright 2025.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package nimmodels
18+
19+
import (
20+
"net/http"
21+
)
22+
23+
type APIError struct {
24+
StatusCode int
25+
Status string
26+
}
27+
28+
func (e APIError) Error() string {
29+
return e.Status
30+
}
31+
32+
func newAPIError(resp *http.Response) *APIError {
33+
return &APIError{
34+
StatusCode: resp.StatusCode,
35+
Status: resp.Status,
36+
}
37+
}
38+
39+
func IsNotFound(err error) bool {
40+
if err == nil {
41+
return false
42+
}
43+
apiErr, ok := err.(*APIError)
44+
return ok && (apiErr.StatusCode == http.StatusNotFound)
45+
}

internal/nimmodels/errors_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package nimmodels
2+
3+
import (
4+
"fmt"
5+
"net/http"
6+
"testing"
7+
)
8+
9+
func TestIsNotFound(t *testing.T) {
10+
tests := []struct {
11+
description string
12+
err error
13+
expected bool
14+
}{
15+
{
16+
description: "no error",
17+
err: nil,
18+
expected: false,
19+
},
20+
{
21+
description: "404 HTTP StatusCode",
22+
err: &APIError{StatusCode: http.StatusNotFound, Status: "404 Not Found"},
23+
expected: true,
24+
},
25+
{
26+
description: "500 HTTP StatusCode",
27+
err: &APIError{StatusCode: http.StatusInternalServerError, Status: "500 Internal Server Error"},
28+
expected: false,
29+
},
30+
{
31+
description: "generic error",
32+
err: fmt.Errorf("some other error"),
33+
expected: false,
34+
},
35+
}
36+
37+
for _, tt := range tests {
38+
t.Run(tt.description, func(t *testing.T) {
39+
result := IsNotFound(tt.err)
40+
if result != tt.expected {
41+
t.Errorf("IsNotFound() = %v, want %v", result, tt.expected)
42+
}
43+
})
44+
}
45+
}

0 commit comments

Comments
 (0)