[feat] Add status of model for AAA page (opendatahub-io#4953)

varshaprasad96 · web-flow · commit 78c772aa00e8 · 2025-09-29T10:58:36.000Z
This PR adds logic to fetch model status from the inference service. Also updates the relevant OpenAI spec and mock endpoint. Test: curl -v -H "Authorization: Bearer xyz" \ "http://localhost:8080/gen-ai/api/v1/aa/models?namespace=genai-lls" { "data": [ { "model_name": "llama-32-3b-instruct", "serving_runtime": "vLLM CPU (amd64 - EXPERIMENTAL) ServingRuntime for KServe", "api_protocol": "REST", "version": "v0.9.1.0", "usecase": "", "description": "", "endpoints": [ "internal: https://llama-32-3b-instruct-predictor.genai-lls.svc.cluster.local", "external: http://llama-32-3b-instruct-predictor.genai-lls.svc.cluster.local" ], "status": "Error" } ] } Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
diff --git a/packages/gen-ai/bff/internal/api/models_aa_handler_test.go b/packages/gen-ai/bff/internal/api/models_aa_handler_test.go
@@ -99,6 +99,7 @@ func TestModelsAAHandler(t *testing.T) {
 		assert.Equal(t, "v2025.1", firstModel["version"])
 		assert.Equal(t, "Code generation", firstModel["usecase"])
 		assert.Equal(t, "IBM Granite 7B model specialized for code generation tasks", firstModel["description"])
+		assert.Equal(t, "Running", firstModel["status"])
 
 		// Check endpoints array
 		endpoints, ok := firstModel["endpoints"].([]interface{})
@@ -116,6 +117,7 @@ func TestModelsAAHandler(t *testing.T) {
 		assert.Equal(t, "v2025.1", secondModel["version"])
 		assert.Equal(t, "General chat", secondModel["usecase"])
 		assert.Equal(t, "Meta Llama 3.1 8B parameter model optimized for instruction following", secondModel["description"])
+		assert.Equal(t, "Running", secondModel["status"])
 
 		// Check second model endpoints
 		secondEndpoints, ok := secondModel["endpoints"].([]interface{})
@@ -133,12 +135,35 @@ func TestModelsAAHandler(t *testing.T) {
 		assert.Equal(t, "v2025.1", thirdModel["version"])
 		assert.Equal(t, "Multilingual, Reasoning", thirdModel["usecase"])
 		assert.Equal(t, "Mistral 7B instruction-tuned model for general purpose tasks", thirdModel["description"])
+		assert.Equal(t, "Running", thirdModel["status"])
 
 		// Check third model endpoints
 		thirdEndpoints, ok := thirdModel["endpoints"].([]interface{})
 		assert.True(t, ok, "Third model endpoints should be an array")
 		assert.Len(t, thirdEndpoints, 1, "Should have 1 endpoint")
 		assert.Equal(t, "internal: http://mistral-7b-instruct.mock-test-namespace-2.svc.cluster.local:8080", thirdEndpoints[0])
+
+		// Check fourth model (ollama/llama3.2:3b)
+		fourthModel, ok := dataArray[3].(map[string]interface{})
+		assert.True(t, ok, "Fourth model should be a map")
+		assert.Equal(t, "ollama/llama3.2:3b", fourthModel["model_name"])
+		assert.Equal(t, "Ollama", fourthModel["serving_runtime"])
+		assert.Equal(t, "v1", fourthModel["api_protocol"])
+		assert.Equal(t, "v2025.1", fourthModel["version"])
+		assert.Equal(t, "General chat, Code generation", fourthModel["usecase"])
+		assert.Equal(t, "Meta Llama 3.2 3B parameter model optimized for efficiency and performance", fourthModel["description"])
+		assert.Equal(t, "Running", fourthModel["status"])
+
+		// Check fifth model (ollama/all-minilm:l6-v2)
+		fifthModel, ok := dataArray[4].(map[string]interface{})
+		assert.True(t, ok, "Fifth model should be a map")
+		assert.Equal(t, "ollama/all-minilm:l6-v2", fifthModel["model_name"])
+		assert.Equal(t, "Ollama", fifthModel["serving_runtime"])
+		assert.Equal(t, "v1", fifthModel["api_protocol"])
+		assert.Equal(t, "v2025.1", fifthModel["version"])
+		assert.Equal(t, "Embeddings, Semantic search", fifthModel["usecase"])
+		assert.Equal(t, "Microsoft All-MiniLM-L6-v2 embedding model for semantic search and text similarity", fifthModel["description"])
+		assert.Equal(t, "Stop", fifthModel["status"])
 	})
 
 	// Test error cases - simple parameter validation
diff --git a/packages/gen-ai/bff/internal/api/models_handler_test.go b/packages/gen-ai/bff/internal/api/models_handler_test.go
@@ -8,12 +8,16 @@ import (
 	"net/http/httptest"
 	"testing"
 
+	kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
 	"github.com/opendatahub-io/gen-ai/internal/config"
 	"github.com/opendatahub-io/gen-ai/internal/constants"
+	"github.com/opendatahub-io/gen-ai/internal/integrations/kubernetes"
 	"github.com/opendatahub-io/gen-ai/internal/integrations/llamastack/lsmocks"
 	"github.com/opendatahub-io/gen-ai/internal/repositories"
 	"github.com/opendatahub-io/gen-ai/internal/testutil"
 	"github.com/stretchr/testify/assert"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	duckv1 "knative.dev/pkg/apis/duck/v1"
 )
 
 func TestLlamaStackModelsHandler(t *testing.T) {
@@ -127,3 +131,93 @@ func TestLlamaStackModelsHandler(t *testing.T) {
 		assert.Equal(t, "ollama/llama3.2:3b", firstModel["id"])
 	})
 }
+
+func TestInferenceServiceStatusExtraction(t *testing.T) {
+	// Test the status extraction logic directly
+	t.Run("should return Running status when Ready condition is True", func(t *testing.T) {
+		// Create a ready InferenceService
+		readyISVC := &kservev1beta1.InferenceService{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "test-ready-model",
+			},
+			Status: kservev1beta1.InferenceServiceStatus{
+				Status: duckv1.Status{
+					Conditions: duckv1.Conditions{
+						{
+							Type:   "Ready",
+							Status: "True",
+						},
+					},
+				},
+			},
+		}
+
+		// Test the status extraction directly
+		status := kubernetes.ExtractStatusFromInferenceService(readyISVC)
+		assert.Equal(t, "Running", status)
+	})
+
+	t.Run("should return Stop status when Ready condition is False", func(t *testing.T) {
+		// Create a not-ready InferenceService
+		notReadyISVC := &kservev1beta1.InferenceService{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "test-error-model",
+			},
+			Status: kservev1beta1.InferenceServiceStatus{
+				Status: duckv1.Status{
+					Conditions: duckv1.Conditions{
+						{
+							Type:   "Ready",
+							Status: "False",
+						},
+					},
+				},
+			},
+		}
+
+		// Test the status extraction directly
+		status := kubernetes.ExtractStatusFromInferenceService(notReadyISVC)
+		assert.Equal(t, "Stop", status)
+	})
+
+	t.Run("should return Stop status when no Ready condition exists", func(t *testing.T) {
+		// Create an InferenceService without Ready condition
+		noReadyISVC := &kservev1beta1.InferenceService{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "test-no-ready-model",
+			},
+			Status: kservev1beta1.InferenceServiceStatus{
+				Status: duckv1.Status{
+					Conditions: duckv1.Conditions{
+						{
+							Type:   "PredictorReady",
+							Status: "True",
+						},
+					},
+				},
+			},
+		}
+
+		// Test the status extraction directly
+		status := kubernetes.ExtractStatusFromInferenceService(noReadyISVC)
+		assert.Equal(t, "Stop", status)
+	})
+
+	t.Run("should return Stop status when no conditions exist", func(t *testing.T) {
+		// Create an InferenceService with no conditions
+		noConditionsISVC := &kservev1beta1.InferenceService{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "test-no-conditions-model",
+			},
+			Status: kservev1beta1.InferenceServiceStatus{
+				Status: duckv1.Status{
+					Conditions: duckv1.Conditions{},
+				},
+			},
+		}
+
+		// Test the status extraction directly
+		status := kubernetes.ExtractStatusFromInferenceService(noConditionsISVC)
+		assert.Equal(t, "Stop", status)
+	})
+}
diff --git a/packages/gen-ai/bff/internal/integrations/kubernetes/k8smocks/token_k8s_client_mock.go b/packages/gen-ai/bff/internal/integrations/kubernetes/k8smocks/token_k8s_client_mock.go
@@ -82,6 +82,7 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
 					fmt.Sprintf("internal: http://granite-7b-code.%s.svc.cluster.local:8080", namespace),
 					fmt.Sprintf("external: https://granite-7b-code-%s.example.com", namespace),
 				},
+				Status: "Running",
 			},
 			{
 				ModelName:      "llama-3.1-8b-instruct",
@@ -94,6 +95,7 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
 					fmt.Sprintf("internal: http://llama-3.1-8b-instruct.%s.svc.cluster.local:8080", namespace),
 					fmt.Sprintf("external: https://llama-3.1-8b-instruct-%s.example.com", namespace),
 				},
+				Status: "Running",
 			},
 			{
 				ModelName:      "mistral-7b-instruct",
@@ -105,6 +107,7 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
 				Endpoints: []string{
 					fmt.Sprintf("internal: http://mistral-7b-instruct.%s.svc.cluster.local:8080", namespace),
 				},
+				Status: "Running",
 			},
 			{
 				ModelName:      "ollama/llama3.2:3b",
@@ -117,6 +120,7 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
 					fmt.Sprintf("internal: http://llama3.2-3b.%s.svc.cluster.local:11434", namespace),
 					fmt.Sprintf("external: https://llama3.2-3b-%s.example.com", namespace),
 				},
+				Status: "Running",
 			},
 			{
 				ModelName:      "ollama/all-minilm:l6-v2",
@@ -129,10 +133,10 @@ func (m *TokenKubernetesClientMock) GetAAModels(ctx context.Context, identity *i
 					fmt.Sprintf("internal: http://all-minilm-l6-v2.%s.svc.cluster.local:11434", namespace),
 					fmt.Sprintf("external: https://all-minilm-l6-v2-%s.example.com", namespace),
 				},
+				Status: "Stop",
 			},
 		}, nil
 	default:
-		// Return empty array for unknown namespaces
 		return []genaiassets.AAModel{}, nil
 	}
 }
diff --git a/packages/gen-ai/bff/internal/integrations/kubernetes/token_k8s_client.go b/packages/gen-ai/bff/internal/integrations/kubernetes/token_k8s_client.go
@@ -293,6 +293,7 @@ func (kc *TokenKubernetesClient) GetAAModels(ctx context.Context, identity *inte
 			Description:    kc.extractDescriptionFromInferenceService(&isvc),
 			Usecase:        kc.extractUseCaseFromInferenceService(&isvc),
 			Endpoints:      kc.extractEndpoints(&isvc),
+			Status:         kc.extractStatusFromInferenceService(&isvc),
 		}
 		aaModels = append(aaModels, aaModel)
 	}
@@ -384,6 +385,23 @@ func (kc *TokenKubernetesClient) extractEndpoints(isvc *kservev1beta1.InferenceS
 	return endpoints
 }
 
+// extractStatusFromInferenceService consolidates all KServe status information into "Running" or "Error"
+func (kc *TokenKubernetesClient) extractStatusFromInferenceService(isvc *kservev1beta1.InferenceService) string {
+	return ExtractStatusFromInferenceService(isvc)
+}
+
+// ExtractStatusFromInferenceService is a pure function that extracts status from InferenceService
+// This function is exported for testing purposes
+func ExtractStatusFromInferenceService(isvc *kservev1beta1.InferenceService) string {
+	// Simply check if the overall service is ready
+	for _, condition := range isvc.Status.Conditions {
+		if condition.Type == "Ready" && condition.Status == "True" {
+			return "Running"
+		}
+	}
+	return "Stop"
+}
+
 func (kc *TokenKubernetesClient) InstallLlamaStackDistribution(ctx context.Context, identity *integrations.RequestIdentity, namespace string, models []string) (*lsdapi.LlamaStackDistribution, error) {
 	ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
 	defer cancel()
diff --git a/packages/gen-ai/bff/internal/models/genaiassets/aa_models.go b/packages/gen-ai/bff/internal/models/genaiassets/aa_models.go
@@ -8,4 +8,5 @@ type AAModel struct {
 	Usecase        string   `json:"usecase"`
 	Description    string   `json:"description"`
 	Endpoints      []string `json:"endpoints"`
+	Status         string   `json:"status"`
 }
diff --git a/packages/gen-ai/bff/openapi/src/gen-ai.yaml b/packages/gen-ai/bff/openapi/src/gen-ai.yaml
@@ -65,6 +65,38 @@ paths:
       summary: List All Models
       description: Gets a list of all available AI models from Llama Stack.
 
+  /gen-ai/api/v1/aa/models:
+    summary: List AI Assets models
+    description: >-
+      Lists all available AI Assets models from Kubernetes InferenceServices.
+      Returns models with their serving runtime, API protocol, version, and status information.
+      Requires namespace parameter for proper multi-tenant isolation.
+    get:
+      tags:
+        - Models
+      security:
+        - Bearer: []
+      parameters:
+        - name: namespace
+          in: query
+          description: Kubernetes namespace to query for AI Assets models
+          required: true
+          schema:
+            type: string
+            example: "genai-lls"
+      responses:
+        '200':
+          $ref: '#/components/responses/AAModelsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest'
+        '401':
+          $ref: '#/components/responses/Unauthorized'
+        '500':
+          $ref: '#/components/responses/InternalServerError'
+      operationId: listAAModels
+      summary: List AI Assets Models
+      description: Gets a list of all available AI Assets models from Kubernetes InferenceServices.
+
   /gen-ai/api/v1/vectorstores:
     summary: Manage vector stores for RAG
     description: >-
@@ -666,12 +698,67 @@ components:
           example: 'llama_stack'
           description: Model owner/provider
 
+    # AI Assets Model Schema
+    AAModel:
+      type: object
+      required:
+        - model_name
+        - serving_runtime
+        - api_protocol
+        - version
+        - endpoints
+        - status
+      properties:
+        model_name:
+          type: string
+          example: 'llama-32-3b-instruct'
+          description: Name of the AI model
+        serving_runtime:
+          type: string
+          example: 'OpenVINO Model Server'
+          description: Runtime used to serve the model
+        api_protocol:
+          type: string
+          example: 'v2'
+          description: API protocol version used by the model
+        version:
+          type: string
+          example: 'v2025.1'
+          description: Version of the model
+        description:
+          type: string
+          nullable: true
+          example: 'A high-performance computer vision model for object detection and classification'
+          description: Human-readable description of the model
+        usecase:
+          type: string
+          nullable: true
+          example: 'Computer Vision'
+          description: Primary use case for the model
+        endpoints:
+          type: array
+          items:
+            type: string
+          example: ['internal: http://llama-32-3b-instruct.genai-lls.svc.cluster.local:8080']
+          description: List of available endpoints for the model
+        status:
+          type: string
+          enum: ['Running', 'Stop']
+          example: 'Running'
+          description: Current status of the model (Running or Stop)
+
     ModelsData:
       type: array
       items:
         $ref: '#/components/schemas/Model'
       description: Array of available models
 
+    AAModelsData:
+      type: array
+      items:
+        $ref: '#/components/schemas/AAModel'
+      description: Array of available AI Assets models
+
     # Vector Stores Schema
     VectorStore:
       type: object
@@ -2012,6 +2099,18 @@ components:
               data:
                 $ref: '#/components/schemas/ModelsData'
 
+    AAModelsResponse:
+      description: List of available AI Assets models
+      content:
+        application/json:
+          schema:
+            type: object
+            required:
+              - data
+            properties:
+              data:
+                $ref: '#/components/schemas/AAModelsData'
+
     VectorStoresResponse:
       description: List of vector stores with pagination support
       content:

Original file line number	Diff line number	Diff line change
`@@ -8,4 +8,5 @@ type AAModel struct {`
`8`	`8`	Usecase string `json:"usecase"`
`9`	`9`	Description string `json:"description"`
`10`	`10`	Endpoints []string `json:"endpoints"`
	`11`	+ Status string `json:"status"`
`11`	`12`	`}`