Skip to content

Commit 78c772a

Browse files
[feat] Add status of model for AAA page (opendatahub-io#4953)
This PR adds logic to fetch model status from the inference service. Also updates the relevant OpenAI spec and mock endpoint. Test: curl -v -H "Authorization: Bearer xyz" \ "http://localhost:8080/gen-ai/api/v1/aa/models?namespace=genai-lls" { "data": [ { "model_name": "llama-32-3b-instruct", "serving_runtime": "vLLM CPU (amd64 - EXPERIMENTAL) ServingRuntime for KServe", "api_protocol": "REST", "version": "v0.9.1.0", "usecase": "", "description": "", "endpoints": [ "internal: https://llama-32-3b-instruct-predictor.genai-lls.svc.cluster.local", "external: http://llama-32-3b-instruct-predictor.genai-lls.svc.cluster.local" ], "status": "Error" } ] } Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
1 parent 78ac036 commit 78c772a

File tree

6 files changed

+242
-1
lines changed

6 files changed

+242
-1
lines changed

packages/gen-ai/bff/internal/api/models_aa_handler_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ func TestModelsAAHandler(t *testing.T) {
9999
assert.Equal(t, "v2025.1", firstModel["version"])
100100
assert.Equal(t, "Code generation", firstModel["usecase"])
101101
assert.Equal(t, "IBM Granite 7B model specialized for code generation tasks", firstModel["description"])
102+
assert.Equal(t, "Running", firstModel["status"])
102103

103104
// Check endpoints array
104105
endpoints, ok := firstModel["endpoints"].([]interface{})
@@ -116,6 +117,7 @@ func TestModelsAAHandler(t *testing.T) {
116117
assert.Equal(t, "v2025.1", secondModel["version"])
117118
assert.Equal(t, "General chat", secondModel["usecase"])
118119
assert.Equal(t, "Meta Llama 3.1 8B parameter model optimized for instruction following", secondModel["description"])
120+
assert.Equal(t, "Running", secondModel["status"])
119121

120122
// Check second model endpoints
121123
secondEndpoints, ok := secondModel["endpoints"].([]interface{})
@@ -133,12 +135,35 @@ func TestModelsAAHandler(t *testing.T) {
133135
assert.Equal(t, "v2025.1", thirdModel["version"])
134136
assert.Equal(t, "Multilingual, Reasoning", thirdModel["usecase"])
135137
assert.Equal(t, "Mistral 7B instruction-tuned model for general purpose tasks", thirdModel["description"])
138+
assert.Equal(t, "Running", thirdModel["status"])
136139

137140
// Check third model endpoints
138141
thirdEndpoints, ok := thirdModel["endpoints"].([]interface{})
139142
assert.True(t, ok, "Third model endpoints should be an array")
140143
assert.Len(t, thirdEndpoints, 1, "Should have 1 endpoint")
141144
assert.Equal(t, "internal: http://mistral-7b-instruct.mock-test-namespace-2.svc.cluster.local:8080", thirdEndpoints[0])
145+
146+
// Check fourth model (ollama/llama3.2:3b)
147+
fourthModel, ok := dataArray[3].(map[string]interface{})
148+
assert.True(t, ok, "Fourth model should be a map")
149+
assert.Equal(t, "ollama/llama3.2:3b", fourthModel["model_name"])
150+
assert.Equal(t, "Ollama", fourthModel["serving_runtime"])
151+
assert.Equal(t, "v1", fourthModel["api_protocol"])
152+
assert.Equal(t, "v2025.1", fourthModel["version"])
153+
assert.Equal(t, "General chat, Code generation", fourthModel["usecase"])
154+
assert.Equal(t, "Meta Llama 3.2 3B parameter model optimized for efficiency and performance", fourthModel["description"])
155+
assert.Equal(t, "Running", fourthModel["status"])
156+
157+
// Check fifth model (ollama/all-minilm:l6-v2)
158+
fifthModel, ok := dataArray[4].(map[string]interface{})
159+
assert.True(t, ok, "Fifth model should be a map")
160+
assert.Equal(t, "ollama/all-minilm:l6-v2", fifthModel["model_name"])
161+
assert.Equal(t, "Ollama", fifthModel["serving_runtime"])
162+
assert.Equal(t, "v1", fifthModel["api_protocol"])
163+
assert.Equal(t, "v2025.1", fifthModel["version"])
164+
assert.Equal(t, "Embeddings, Semantic search", fifthModel["usecase"])
165+
assert.Equal(t, "Microsoft All-MiniLM-L6-v2 embedding model for semantic search and text similarity", fifthModel["description"])
166+
assert.Equal(t, "Stop", fifthModel["status"])
142167
})
143168

144169
// Test error cases - simple parameter validation

packages/gen-ai/bff/internal/api/models_handler_test.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,16 @@ import (
88
"net/http/httptest"
99
"testing"
1010

11+
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
1112
"github.com/opendatahub-io/gen-ai/internal/config"
1213
"github.com/opendatahub-io/gen-ai/internal/constants"
14+
"github.com/opendatahub-io/gen-ai/internal/integrations/kubernetes"
1315
"github.com/opendatahub-io/gen-ai/internal/integrations/llamastack/lsmocks"
1416
"github.com/opendatahub-io/gen-ai/internal/repositories"
1517
"github.com/opendatahub-io/gen-ai/internal/testutil"
1618
"github.com/stretchr/testify/assert"
19+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20+
duckv1 "knative.dev/pkg/apis/duck/v1"
1721
)
1822

1923
func TestLlamaStackModelsHandler(t *testing.T) {
@@ -127,3 +131,93 @@ func TestLlamaStackModelsHandler(t *testing.T) {
127131
assert.Equal(t, "ollama/llama3.2:3b", firstModel["id"])
128132
})
129133
}
134+
135+
func TestInferenceServiceStatusExtraction(t *testing.T) {
136+
// Test the status extraction logic directly
137+
t.Run("should return Running status when Ready condition is True", func(t *testing.T) {
138+
// Create a ready InferenceService
139+
readyISVC := &kservev1beta1.InferenceService{
140+
ObjectMeta: metav1.ObjectMeta{
141+
Name: "test-ready-model",
142+
},
143+
Status: kservev1beta1.InferenceServiceStatus{
144+
Status: duckv1.Status{
145+
Conditions: duckv1.Conditions{
146+
{
147+
Type: "Ready",
148+
Status: "True",
149+
},
150+
},
151+
},
152+
},
153+
}
154+
155+
// Test the status extraction directly
156+
status := kubernetes.ExtractStatusFromInferenceService(readyISVC)
157+
assert.Equal(t, "Running", status)
158+
})
159+
160+
t.Run("should return Stop status when Ready condition is False", func(t *testing.T) {
161+
// Create a not-ready InferenceService
162+
notReadyISVC := &kservev1beta1.InferenceService{
163+
ObjectMeta: metav1.ObjectMeta{
164+
Name: "test-error-model",
165+
},
166+
Status: kservev1beta1.InferenceServiceStatus{
167+
Status: duckv1.Status{
168+
Conditions: duckv1.Conditions{
169+
{
170+
Type: "Ready",
171+
Status: "False",
172+
},
173+
},
174+
},
175+
},
176+
}
177+
178+
// Test the status extraction directly
179+
status := kubernetes.ExtractStatusFromInferenceService(notReadyISVC)
180+
assert.Equal(t, "Stop", status)
181+
})
182+
183+
t.Run("should return Stop status when no Ready condition exists", func(t *testing.T) {
184+
// Create an InferenceService without Ready condition
185+
noReadyISVC := &kservev1beta1.InferenceService{
186+
ObjectMeta: metav1.ObjectMeta{
187+
Name: "test-no-ready-model",
188+
},
189+
Status: kservev1beta1.InferenceServiceStatus{
190+
Status: duckv1.Status{
191+
Conditions: duckv1.Conditions{
192+
{
193+
Type: "PredictorReady",
194+
Status: "True",
195+
},
196+
},
197+
},
198+
},
199+
}
200+
201+
// Test the status extraction directly
202+
status := kubernetes.ExtractStatusFromInferenceService(noReadyISVC)
203+
assert.Equal(t, "Stop", status)
204+
})
205+
206+
t.Run("should return Stop status when no conditions exist", func(t *testing.T) {
207+
// Create an InferenceService with no conditions
208+
noConditionsISVC := &kservev1beta1.InferenceService{
209+
ObjectMeta: metav1.ObjectMeta{
210+
Name: "test-no-conditions-model",
211+
},
212+
Status: kservev1beta1.InferenceServiceStatus{
213+
Status: duckv1.Status{
214+
Conditions: duckv1.Conditions{},
215+
},
216+
},
217+
}
218+
219+
// Test the status extraction directly
220+
status := kubernetes.ExtractStatusFromInferenceService(noConditionsISVC)
221+
assert.Equal(t, "Stop", status)
222+
})
223+
}

packages/gen-ai/bff/internal/integrations/kubernetes/k8smocks/token_k8s_client_mock.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
8282
fmt.Sprintf("internal: http://granite-7b-code.%s.svc.cluster.local:8080", namespace),
8383
fmt.Sprintf("external: https://granite-7b-code-%s.example.com", namespace),
8484
},
85+
Status: "Running",
8586
},
8687
{
8788
ModelName: "llama-3.1-8b-instruct",
@@ -94,6 +95,7 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
9495
fmt.Sprintf("internal: http://llama-3.1-8b-instruct.%s.svc.cluster.local:8080", namespace),
9596
fmt.Sprintf("external: https://llama-3.1-8b-instruct-%s.example.com", namespace),
9697
},
98+
Status: "Running",
9799
},
98100
{
99101
ModelName: "mistral-7b-instruct",
@@ -105,6 +107,7 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
105107
Endpoints: []string{
106108
fmt.Sprintf("internal: http://mistral-7b-instruct.%s.svc.cluster.local:8080", namespace),
107109
},
110+
Status: "Running",
108111
},
109112
{
110113
ModelName: "ollama/llama3.2:3b",
@@ -117,6 +120,7 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
117120
fmt.Sprintf("internal: http://llama3.2-3b.%s.svc.cluster.local:11434", namespace),
118121
fmt.Sprintf("external: https://llama3.2-3b-%s.example.com", namespace),
119122
},
123+
Status: "Running",
120124
},
121125
{
122126
ModelName: "ollama/all-minilm:l6-v2",
@@ -129,10 +133,10 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
129133
fmt.Sprintf("internal: http://all-minilm-l6-v2.%s.svc.cluster.local:11434", namespace),
130134
fmt.Sprintf("external: https://all-minilm-l6-v2-%s.example.com", namespace),
131135
},
136+
Status: "Stop",
132137
},
133138
}, nil
134139
default:
135-
// Return empty array for unknown namespaces
136140
return []genaiassets.AAModel{}, nil
137141
}
138142
}

packages/gen-ai/bff/internal/integrations/kubernetes/token_k8s_client.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ func (kc *TokenKubernetesClient) GetAAModels(ctx context.Context, identity *inte
293293
Description: kc.extractDescriptionFromInferenceService(&isvc),
294294
Usecase: kc.extractUseCaseFromInferenceService(&isvc),
295295
Endpoints: kc.extractEndpoints(&isvc),
296+
Status: kc.extractStatusFromInferenceService(&isvc),
296297
}
297298
aaModels = append(aaModels, aaModel)
298299
}
@@ -384,6 +385,23 @@ func (kc *TokenKubernetesClient) extractEndpoints(isvc *kservev1beta1.InferenceS
384385
return endpoints
385386
}
386387

388+
// extractStatusFromInferenceService consolidates all KServe status information into "Running" or "Error"
389+
func (kc *TokenKubernetesClient) extractStatusFromInferenceService(isvc *kservev1beta1.InferenceService) string {
390+
return ExtractStatusFromInferenceService(isvc)
391+
}
392+
393+
// ExtractStatusFromInferenceService is a pure function that extracts status from InferenceService
394+
// This function is exported for testing purposes
395+
func ExtractStatusFromInferenceService(isvc *kservev1beta1.InferenceService) string {
396+
// Simply check if the overall service is ready
397+
for _, condition := range isvc.Status.Conditions {
398+
if condition.Type == "Ready" && condition.Status == "True" {
399+
return "Running"
400+
}
401+
}
402+
return "Stop"
403+
}
404+
387405
func (kc *TokenKubernetesClient) InstallLlamaStackDistribution(ctx context.Context, identity *integrations.RequestIdentity, namespace string, models []string) (*lsdapi.LlamaStackDistribution, error) {
388406
ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
389407
defer cancel()

packages/gen-ai/bff/internal/models/genaiassets/aa_models.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ type AAModel struct {
88
Usecase string `json:"usecase"`
99
Description string `json:"description"`
1010
Endpoints []string `json:"endpoints"`
11+
Status string `json:"status"`
1112
}

packages/gen-ai/bff/openapi/src/gen-ai.yaml

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,38 @@ paths:
6565
summary: List All Models
6666
description: Gets a list of all available AI models from Llama Stack.
6767

68+
/gen-ai/api/v1/aa/models:
69+
summary: List AI Assets models
70+
description: >-
71+
Lists all available AI Assets models from Kubernetes InferenceServices.
72+
Returns models with their serving runtime, API protocol, version, and status information.
73+
Requires namespace parameter for proper multi-tenant isolation.
74+
get:
75+
tags:
76+
- Models
77+
security:
78+
- Bearer: []
79+
parameters:
80+
- name: namespace
81+
in: query
82+
description: Kubernetes namespace to query for AI Assets models
83+
required: true
84+
schema:
85+
type: string
86+
example: "genai-lls"
87+
responses:
88+
'200':
89+
$ref: '#/components/responses/AAModelsResponse'
90+
'400':
91+
$ref: '#/components/responses/BadRequest'
92+
'401':
93+
$ref: '#/components/responses/Unauthorized'
94+
'500':
95+
$ref: '#/components/responses/InternalServerError'
96+
operationId: listAAModels
97+
summary: List AI Assets Models
98+
description: Gets a list of all available AI Assets models from Kubernetes InferenceServices.
99+
68100
/gen-ai/api/v1/vectorstores:
69101
summary: Manage vector stores for RAG
70102
description: >-
@@ -666,12 +698,67 @@ components:
666698
example: 'llama_stack'
667699
description: Model owner/provider
668700

701+
# AI Assets Model Schema
702+
AAModel:
703+
type: object
704+
required:
705+
- model_name
706+
- serving_runtime
707+
- api_protocol
708+
- version
709+
- endpoints
710+
- status
711+
properties:
712+
model_name:
713+
type: string
714+
example: 'llama-32-3b-instruct'
715+
description: Name of the AI model
716+
serving_runtime:
717+
type: string
718+
example: 'OpenVINO Model Server'
719+
description: Runtime used to serve the model
720+
api_protocol:
721+
type: string
722+
example: 'v2'
723+
description: API protocol version used by the model
724+
version:
725+
type: string
726+
example: 'v2025.1'
727+
description: Version of the model
728+
description:
729+
type: string
730+
nullable: true
731+
example: 'A high-performance computer vision model for object detection and classification'
732+
description: Human-readable description of the model
733+
usecase:
734+
type: string
735+
nullable: true
736+
example: 'Computer Vision'
737+
description: Primary use case for the model
738+
endpoints:
739+
type: array
740+
items:
741+
type: string
742+
example: ['internal: http://llama-32-3b-instruct.genai-lls.svc.cluster.local:8080']
743+
description: List of available endpoints for the model
744+
status:
745+
type: string
746+
enum: ['Running', 'Stop']
747+
example: 'Running'
748+
description: Current status of the model (Running or Stop)
749+
669750
ModelsData:
670751
type: array
671752
items:
672753
$ref: '#/components/schemas/Model'
673754
description: Array of available models
674755

756+
AAModelsData:
757+
type: array
758+
items:
759+
$ref: '#/components/schemas/AAModel'
760+
description: Array of available AI Assets models
761+
675762
# Vector Stores Schema
676763
VectorStore:
677764
type: object
@@ -2012,6 +2099,18 @@ components:
20122099
data:
20132100
$ref: '#/components/schemas/ModelsData'
20142101

2102+
AAModelsResponse:
2103+
description: List of available AI Assets models
2104+
content:
2105+
application/json:
2106+
schema:
2107+
type: object
2108+
required:
2109+
- data
2110+
properties:
2111+
data:
2112+
$ref: '#/components/schemas/AAModelsData'
2113+
20152114
VectorStoresResponse:
20162115
description: List of vector stores with pagination support
20172116
content:

0 commit comments

Comments
 (0)