Skip to content

Commit 66f4831

Browse files
authored
View existing llmiscvconfig deployments (opendatahub-io#6800)
* Update modelServingSelectFieldData type * Load existing llmconfig deployment * View existing llmiscvconfig deployments * Add cypress test
1 parent b5cf206 commit 66f4831

32 files changed

Lines changed: 895 additions & 195 deletions

frontend/src/__mocks__/mockLLMInferenceServiceConfigK8sResource.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ type MockLLMInferenceServiceConfigType = {
99
runtimeVersion?: string;
1010
modelUri?: string;
1111
modelName?: string;
12+
templateName?: string;
1213
};
1314

1415
export const mockLLMInferenceServiceConfigK8sResource = ({
@@ -20,6 +21,7 @@ export const mockLLMInferenceServiceConfigK8sResource = ({
2021
runtimeVersion = 'v0.9.1',
2122
modelUri = 'hf://test/model',
2223
modelName = 'test-model',
24+
templateName,
2325
}: MockLLMInferenceServiceConfigType): LLMInferenceServiceConfigKind => ({
2426
apiVersion: 'serving.kserve.io/v1alpha1',
2527
kind: 'LLMInferenceServiceConfig',
@@ -32,6 +34,7 @@ export const mockLLMInferenceServiceConfigK8sResource = ({
3234
...(recommendedAccelerators
3335
? { 'opendatahub.io/recommended-accelerators': recommendedAccelerators }
3436
: {}),
37+
...(templateName ? { 'opendatahub.io/template-name': templateName } : {}),
3538
},
3639
labels: {
3740
'opendatahub.io/config-type': configType,

frontend/src/__mocks__/mockLLMInferenceServiceK8sResource.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ type MockLLMInferenceServiceConfigType = {
2020
isNonDashboardItem?: boolean;
2121
modelType?: ServingRuntimeModelType;
2222
isStopped?: boolean;
23+
baseRefs?: { name?: string }[];
2324
};
2425

2526
export const mockLLMInferenceServiceK8sResource = ({
@@ -36,6 +37,7 @@ export const mockLLMInferenceServiceK8sResource = ({
3637
url,
3738
addresses,
3839
isStopped = false,
40+
baseRefs,
3941
}: MockLLMInferenceServiceConfigType): LLMInferenceServiceKind => ({
4042
apiVersion: 'serving.kserve.io/v1alpha1',
4143
kind: 'LLMInferenceService',
@@ -59,6 +61,7 @@ export const mockLLMInferenceServiceK8sResource = ({
5961
uid: genUID('llm-service'),
6062
},
6163
spec: {
64+
...(baseRefs && { baseRefs }),
6265
model: {
6366
name: modelName,
6467
uri: modelUri,

packages/cypress/cypress/tests/mocked/modelServing/modelServingLlmd.cy.ts

Lines changed: 72 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ const initIntercepts = ({
119119
mockK8sResourceList([mockProjectK8sResource({ enableKServe: true })]),
120120
);
121121
cy.interceptK8sList(LLMInferenceServiceModel, mockK8sResourceList(llmInferenceServices));
122+
cy.interceptK8sList(
123+
{ model: LLMInferenceServiceConfigModel, ns: 'test-project' },
124+
mockK8sResourceList([]),
125+
);
122126
cy.interceptK8sList(InferenceServiceModel, mockK8sResourceList(inferenceServices));
123127
cy.interceptK8sList(ServingRuntimeModel, mockK8sResourceList(servingRuntimes));
124128

@@ -630,9 +634,18 @@ describe('Model Serving LLMD', () => {
630634
});
631635
});
632636

633-
describe('Deploy vLLM using LLMInferenceServiceConfig', () => {
637+
describe('vLLM using LLMInferenceServiceConfig', () => {
634638
const initVLLMOnMaaSIntercepts = () => {
635-
initIntercepts({});
639+
initIntercepts({
640+
llmInferenceServices: [
641+
mockLLMInferenceServiceK8sResource({
642+
name: 'test-vllm-gpu',
643+
displayName: 'GPU vLLM Deployment',
644+
baseRefs: [{ name: 'test-vllm-gpu' }],
645+
modelType: ServingRuntimeModelType.GENERATIVE,
646+
}),
647+
],
648+
});
636649

637650
// Override config to enable vLLMDeploymentOnMaaS
638651
cy.interceptOdh(
@@ -652,19 +665,63 @@ describe('Model Serving LLMD', () => {
652665
mockK8sResourceList([
653666
mockLLMInferenceServiceConfigK8sResource({
654667
name: 'vllm-gaudi-config',
655-
displayName: 'vLLM on Gaudi',
668+
displayName: 'vLLM on Gaudi LLMInferenceServiceConfig',
656669
runtimeVersion: 'v0.9.1',
657670
}),
658671
mockLLMInferenceServiceConfigK8sResource({
659672
name: 'vllm-gpu-config',
660-
displayName: 'vLLM on GPU',
673+
displayName: 'vLLM on GPU LLMInferenceServiceConfig',
674+
runtimeVersion: 'v0.8.2',
675+
}),
676+
]),
677+
);
678+
679+
// Child config in project namespace — linked to the IS via matching name
680+
cy.interceptK8sList(
681+
{ model: LLMInferenceServiceConfigModel, ns: 'test-project' },
682+
mockK8sResourceList([
683+
mockLLMInferenceServiceConfigK8sResource({
684+
name: 'test-vllm-gpu',
685+
namespace: 'test-project',
686+
displayName: 'vLLM on GPU LLMInferenceServiceConfig',
661687
runtimeVersion: 'v0.8.2',
688+
templateName: 'vllm-gpu-config',
662689
}),
663690
]),
664691
);
692+
693+
cy.intercept('PUT', '**/llminferenceservices/test-vllm-gpu*', (req) => {
694+
req.reply({ statusCode: 200, body: req.body });
695+
}).as('updateLLMInferenceService');
665696
};
666697

667-
it('should show LLM config options when generative model type is selected with vLLMDeploymentOnMaaS enabled', () => {
698+
it('should display serving runtime name and version, then pre-fill when editing', () => {
699+
initVLLMOnMaaSIntercepts();
700+
701+
modelServingGlobal.visit('test-project');
702+
703+
// Verify the table shows the serving runtime name and version label
704+
const row = modelServingGlobal.getDeploymentRow('GPU vLLM Deployment');
705+
row.findServingRuntime().should('contain.text', 'vLLM on GPU LLMInferenceServiceConfig');
706+
row.findServingRuntimeVersionLabel().should('contain.text', 'v0.8.2');
707+
708+
// Open the edit wizard and verify the Serving runtime field is pre-filled on step 2
709+
modelServingGlobal.getModelRow('GPU vLLM Deployment').findKebabAction('Edit').click();
710+
711+
// Step 1: Model source — select URI, enter the model location, and proceed
712+
modelServingWizardEdit.findModelLocationSelectOption(ModelLocationSelectOption.URI).click();
713+
modelServingWizardEdit.findUrilocationInput().type('hf://facebook/opt-125m');
714+
modelServingWizardEdit.findSaveConnectionCheckbox().click();
715+
modelServingWizardEdit.findNextButton().should('be.enabled').click();
716+
717+
// Step 2: Verify the Serving runtime selector is pre-filled with the vLLM config name
718+
modelServingWizardEdit
719+
.findServingRuntimeTemplateSearchSelector()
720+
.should('be.disabled')
721+
.should('contain.text', 'vLLM on GPU LLMInferenceServiceConfig');
722+
});
723+
724+
it('Deploy vLLM using LLMInferenceServiceConfig', () => {
668725
initVLLMOnMaaSIntercepts();
669726

670727
modelServingGlobal.visit('test-project');
@@ -686,16 +743,22 @@ describe('Model Serving LLMD', () => {
686743
modelServingWizard
687744
.findGlobalScopedTemplateOption('Distributed inference with llm-d')
688745
.should('exist');
689-
modelServingWizard.findGlobalScopedTemplateOption('vLLM on Gaudi').should('exist');
690-
modelServingWizard.findGlobalScopedTemplateOption('vLLM on GPU').should('exist');
746+
modelServingWizard
747+
.findGlobalScopedTemplateOption('vLLM on Gaudi LLMInferenceServiceConfig')
748+
.should('exist');
749+
modelServingWizard
750+
.findGlobalScopedTemplateOption('vLLM on GPU LLMInferenceServiceConfig')
751+
.should('exist');
691752

692753
// Select a vLLM config option
693-
modelServingWizard.findGlobalScopedTemplateOption('vLLM on Gaudi').click();
754+
modelServingWizard
755+
.findGlobalScopedTemplateOption('vLLM on Gaudi LLMInferenceServiceConfig')
756+
.click();
694757

695758
// Verify the selected option is displayed
696759
modelServingWizard
697760
.findServingRuntimeTemplateSearchSelector()
698-
.should('contain.text', 'vLLM on Gaudi');
761+
.should('contain.text', 'vLLM on Gaudi LLMInferenceServiceConfig');
699762
});
700763
});
701764
});

packages/kserve/src/deploy.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ export const deployKServeDeployment = async (
4747
project: projectName,
4848
name: wizardData.k8sNameDesc.data.k8sName.value,
4949
servingRuntime: serverResource,
50-
scope: wizardData.modelServer.data?.scope || '',
50+
scope: wizardData.modelServer.data?.selection?.scope,
5151
templateName: serverResourceTemplateName,
5252
},
5353
dryRun,

packages/kserve/src/deployServer.ts

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ import { applyK8sAPIOptions } from '@odh-dashboard/internal/api/apiMergeUtils';
22
import { ServingRuntimeModel } from '@odh-dashboard/internal/api/index';
33
import { getDisplayNameFromK8sResource } from '@odh-dashboard/internal/concepts/k8s/utils';
44
import { ServingRuntimeKind, type InferenceServiceKind } from '@odh-dashboard/internal/k8sTypes';
5-
import type { ModelServerOption } from '@odh-dashboard/model-serving/components/deploymentWizard/fields/ModelServerTemplateSelectField.js';
5+
import type { ModelServerSelectFieldData } from '@odh-dashboard/model-serving/components/deploymentWizard/fields/ModelServerTemplateSelectField.js';
66
import { k8sCreateResource } from '@openshift/dynamic-plugin-sdk-utils';
77
import type { KServeDeployment } from './deployments';
88

99
type CreatingServingRuntimeObject = {
1010
project: string;
1111
servingRuntime: ServingRuntimeKind;
1212
name: string;
13-
scope: string;
13+
scope?: string;
1414
templateName?: string;
1515
};
1616

@@ -24,7 +24,7 @@ const assembleServingRuntime = (data: CreatingServingRuntimeObject): ServingRunt
2424
'openshift.io/display-name': getDisplayNameFromK8sResource(servingRuntime),
2525
'opendatahub.io/template-name': templateName ?? servingRuntime.metadata.name,
2626
'opendatahub.io/template-display-name': getDisplayNameFromK8sResource(servingRuntime),
27-
'opendatahub.io/serving-runtime-scope': scope,
27+
...(scope && { 'opendatahub.io/serving-runtime-scope': scope }),
2828
};
2929

3030
updatedServingRuntime.metadata.annotations = annotations;
@@ -68,22 +68,26 @@ export const applyModelRuntime = (
6868
export const extractModelServerTemplate = (
6969
KServeDeployment: KServeDeployment,
7070
dashboardNamespace?: string,
71-
): ModelServerOption | null => {
71+
): ModelServerSelectFieldData | null => {
7272
const templateDisplayName =
7373
KServeDeployment.server?.metadata.annotations?.['opendatahub.io/template-display-name'];
7474
const displayName = KServeDeployment.server?.metadata.annotations?.['openshift.io/display-name'];
7575
const label = templateDisplayName ?? displayName;
7676
return KServeDeployment.server
7777
? {
78-
name: KServeDeployment.server.metadata.annotations?.['opendatahub.io/template-name'] ?? '',
79-
namespace:
80-
KServeDeployment.server.metadata.annotations?.['opendatahub.io/serving-runtime-scope'] ===
81-
'global'
82-
? dashboardNamespace
83-
: KServeDeployment.server.metadata.namespace,
84-
scope:
85-
KServeDeployment.server.metadata.annotations?.['opendatahub.io/serving-runtime-scope'],
86-
label,
78+
selection: {
79+
name:
80+
KServeDeployment.server.metadata.annotations?.['opendatahub.io/template-name'] ?? '',
81+
namespace:
82+
KServeDeployment.server.metadata.annotations?.[
83+
'opendatahub.io/serving-runtime-scope'
84+
] === 'global'
85+
? dashboardNamespace
86+
: KServeDeployment.server.metadata.namespace,
87+
scope:
88+
KServeDeployment.server.metadata.annotations?.['opendatahub.io/serving-runtime-scope'],
89+
label,
90+
},
8791
}
8892
: null;
8993
};

packages/llmd-serving/extensions/extensions.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ import type {
1313
// eslint-disable-next-line no-restricted-syntax
1414
import { SupportedArea } from '@odh-dashboard/internal/concepts/areas/types';
1515
import type { AreaExtension } from '@odh-dashboard/plugin-core/extension-points';
16-
import type { LLMdDeployment } from '../src/types';
16+
import type { FetchStateObject } from '@odh-dashboard/internal/utilities/useFetch';
17+
import type { LLMdDeployment, LLMInferenceServiceConfigKind } from '../src/types';
1718
import type {
1819
LLMConfigOptionsData,
1920
LLMConfigOptionsFieldValue,
@@ -24,7 +25,7 @@ export const LLMD_SERVING_ID = 'llmd-serving';
2425
const extensions: (
2526
| AreaExtension
2627
| ModelServingPlatformWatchDeploymentsExtension<LLMdDeployment>
27-
| DeployedModelServingDetails<LLMdDeployment>
28+
| DeployedModelServingDetails<LLMdDeployment, FetchStateObject<LLMInferenceServiceConfigKind[]>>
2829
| ModelServingDeploymentFormDataExtension<LLMdDeployment>
2930
| ModelServingDeleteModal<LLMdDeployment>
3031
| ModelServingDeploy<LLMdDeployment>
@@ -57,7 +58,12 @@ const extensions: (
5758
type: 'model-serving.deployed-model/serving-runtime',
5859
properties: {
5960
platform: LLMD_SERVING_ID,
60-
ServingDetailsComponent: () => import('../src/components/servingRuntime'),
61+
dataHook: () =>
62+
import('../src/components/ServingDetails').then((m) => m.useServingDetailsData),
63+
ServingDetailsComponent: () =>
64+
import('../src/components/ServingDetails').then((m) => ({
65+
default: m.default,
66+
})),
6167
},
6268
flags: {
6369
required: [LLMD_SERVING_ID],

packages/llmd-serving/src/api/LLMInferenceService.ts

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import {
55
k8sUpdateResource,
66
} from '@openshift/dynamic-plugin-sdk-utils';
77
import { applyK8sAPIOptions } from '@odh-dashboard/internal/api/apiMergeUtils';
8-
import { createPatchesFromDiff } from '@odh-dashboard/internal/api/k8sUtils';
8+
import { createPatchesFromDiff, groupVersionKind } from '@odh-dashboard/internal/api/k8sUtils';
9+
import { CustomWatchK8sResult } from '@odh-dashboard/internal/types';
10+
import useK8sWatchResourceList from '@odh-dashboard/internal/utilities/useK8sWatchResourceList';
911
import { LLMInferenceServiceKind, LLMInferenceServiceModel } from '../types';
1012

1113
export const createLLMInferenceService = (
@@ -75,3 +77,20 @@ export const patchLLMInferenceService = (
7577
),
7678
);
7779
};
80+
81+
export const useWatchLLMInferenceService = (
82+
namespace: string,
83+
opts?: K8sAPIOptions,
84+
labelSelectors?: { [key: string]: string },
85+
): CustomWatchK8sResult<LLMInferenceServiceKind[]> => {
86+
return useK8sWatchResourceList<LLMInferenceServiceKind[]>(
87+
{
88+
isList: true,
89+
groupVersionKind: groupVersionKind(LLMInferenceServiceModel),
90+
namespace,
91+
...(labelSelectors && { selector: labelSelectors }),
92+
},
93+
LLMInferenceServiceModel,
94+
opts,
95+
);
96+
};

packages/llmd-serving/src/api/LLMInferenceServiceConfigs.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
11
import React from 'react';
22
import useFetch, { FetchStateObject } from '@odh-dashboard/internal/utilities/useFetch';
33
import { k8sListResourceItems } from '@openshift/dynamic-plugin-sdk-utils';
4+
import useK8sWatchResourceList from '@odh-dashboard/internal/utilities/useK8sWatchResourceList';
5+
import { groupVersionKind } from '@odh-dashboard/internal/api/k8sUtils';
6+
import { K8sAPIOptions } from '@odh-dashboard/internal/k8sTypes';
7+
import { CustomWatchK8sResult } from '@odh-dashboard/internal/types';
48
import { LLMInferenceServiceConfigModel, type LLMInferenceServiceConfigKind } from '../types';
59

10+
/**
11+
* @returns Template versions of the LLMInferenceServiceConfigKind[] (filtered on 'opendatahub.io/config-type=accelerator')
12+
*/
613
export const listLLMInferenceServiceConfigs = async (
714
namespace: string,
815
): Promise<LLMInferenceServiceConfigKind[]> => {
@@ -17,6 +24,9 @@ export const listLLMInferenceServiceConfigs = async (
1724
});
1825
};
1926

27+
/**
28+
* @returns Template versions of the LLMInferenceServiceConfigKind[] (filtered on 'opendatahub.io/config-type=accelerator')
29+
*/
2030
export const useFetchLLMInferenceServiceConfigs = (
2131
namespace: string,
2232
): FetchStateObject<LLMInferenceServiceConfigKind[]> => {
@@ -26,3 +36,18 @@ export const useFetchLLMInferenceServiceConfigs = (
2636

2737
return useFetch(fetchCallbackPromise, []);
2838
};
39+
40+
export const useWatchLLMInferenceServiceConfigs = (
41+
namespace: string,
42+
opts?: K8sAPIOptions,
43+
): CustomWatchK8sResult<LLMInferenceServiceConfigKind[]> => {
44+
return useK8sWatchResourceList<LLMInferenceServiceConfigKind[]>(
45+
{
46+
isList: true,
47+
groupVersionKind: groupVersionKind(LLMInferenceServiceConfigModel),
48+
namespace,
49+
},
50+
LLMInferenceServiceConfigModel,
51+
opts,
52+
);
53+
};

0 commit comments

Comments
 (0)