Merge branch 'main' into fix/autorag-max-pred-length

nickmazzi · web-flow · commit 35ad71c1b77b · 2026-04-09T14:37:14.000-04:00
diff --git a/packages/cypress/cypress/tests/mocked/modelServing/modelServingLlmd.cy.ts b/packages/cypress/cypress/tests/mocked/modelServing/modelServingLlmd.cy.ts
@@ -1107,6 +1107,7 @@ describe('Model Serving LLMD', () => {
         expect(interception.request.url).to.include('?dryRun=All');
         expect(interception.request.body.spec.baseRefs).to.have.length(1);
         expect(interception.request.body.spec.baseRefs).to.deep.include({ name: deploymentName });
+        expect(interception.request.body.spec.router).to.not.have.property('scheduler');
       });
 
       // Actual: config created with same resource name as deployment, cloned from the selected template
@@ -1123,6 +1124,7 @@ describe('Model Serving LLMD', () => {
         expect(interception.request.url).not.to.include('?dryRun=All');
         expect(interception.request.body.spec.baseRefs).to.have.length(1);
         expect(interception.request.body.spec.baseRefs).to.deep.include({ name: deploymentName });
+        expect(interception.request.body.spec.router).to.not.have.property('scheduler');
       });
     });
 
@@ -1213,6 +1215,7 @@ describe('Model Serving LLMD', () => {
         expect(interception.request.url).to.include('?dryRun=All');
         expect(interception.request.body.spec.baseRefs).to.have.length(1);
         expect(interception.request.body.spec.baseRefs).to.deep.include({ name: 'test-vllm-gpu' });
+        expect(interception.request.body.spec.router).to.not.have.property('scheduler');
       });
 
       // Actual: config updated (preserved), IS updated with exactly one baseRef preserved
@@ -1224,6 +1227,7 @@ describe('Model Serving LLMD', () => {
         expect(interception.request.url).not.to.include('?dryRun=All');
         expect(interception.request.body.spec.baseRefs).to.have.length(1);
         expect(interception.request.body.spec.baseRefs).to.deep.include({ name: 'test-vllm-gpu' });
+        expect(interception.request.body.spec.router).to.not.have.property('scheduler');
       });
     });
 
diff --git a/packages/gen-ai/frontend/src/app/Chatbot/ChatbotMain.tsx b/packages/gen-ai/frontend/src/app/Chatbot/ChatbotMain.tsx
@@ -39,8 +39,10 @@ const ChatbotMain: React.FunctionComponent = () => {
     aiModelsError,
     maasModels,
     maasModelsLoaded,
+    maasModelsError,
     models,
     modelsLoaded,
+    modelsError,
   } = React.useContext(ChatbotContext);
   const { namespace } = React.useContext(GenAiContext);
   const { data: bffConfig } = useFetchBFFConfig();
@@ -105,9 +107,9 @@ const ChatbotMain: React.FunctionComponent = () => {
         title={<ChatbotHeader />}
         loaded={
           lsdStatusLoaded &&
-          aiModelsLoaded &&
-          maasModelsLoaded &&
-          (lsdStatus?.phase !== 'Ready' || modelsLoaded)
+          (aiModelsLoaded || !!aiModelsError) &&
+          (maasModelsLoaded || !!maasModelsError) &&
+          (lsdStatus?.phase !== 'Ready' || !!modelsLoaded || !!modelsError)
         }
         empty={!lsdStatus}
         emptyStatePage={
diff --git a/packages/gen-ai/frontend/src/app/Chatbot/components/PromptAssistantFormGroup.tsx b/packages/gen-ai/frontend/src/app/Chatbot/components/PromptAssistantFormGroup.tsx
@@ -1,5 +1,4 @@
 import * as React from 'react';
-import { get } from 'lodash';
 import {
   Button,
   Flex,
@@ -105,14 +104,9 @@ export default function PromptAssistantFormGroup({
 
   function buildPromptStub(): MLflowPromptVersion {
     const now = new Date();
-    const pad = (n: number) => String(n).padStart(2, '0');
-    const month = now.toLocaleString('en', { month: 'short' });
-    const date = [month, pad(now.getDate()), now.getFullYear()].join('.');
-    const time = [pad(now.getHours()), pad(now.getMinutes())].join('.');
-    const name = `${date}_${time}`;
     /* eslint-disable camelcase */
     return {
-      name,
+      name: '',
       version: 0,
       template: '',
       commit_message: '',
@@ -143,7 +137,7 @@ export default function PromptAssistantFormGroup({
           }}
         >
           <Flex>
-            <Title headingLevel="h6">{get(dirtyPrompt, 'name', 'New Prompt')}</Title>
+            <Title headingLevel="h6">{dirtyPrompt?.name || 'New Prompt'}</Title>
             {!!activePrompt?.version && (
               <Label
                 isCompact
diff --git a/packages/gen-ai/frontend/src/app/Chatbot/components/promptManagementModal/__tests__/promptDrawer.spec.tsx b/packages/gen-ai/frontend/src/app/Chatbot/components/promptManagementModal/__tests__/promptDrawer.spec.tsx
@@ -66,9 +66,7 @@ describe('PromptDrawer', () => {
   it('should display prompt template in text area', () => {
     render(<PromptDrawer {...defaultProps} selectedVersion={2} />);
 
-    expect(screen.getByLabelText('prompt template')).toHaveValue(
-      JSON.stringify('You are a helpful assistant.', null, 2),
-    );
+    expect(screen.getByLabelText('prompt template')).toHaveValue('You are a helpful assistant.');
   });
 
   it('should display commit message', () => {
diff --git a/packages/gen-ai/frontend/src/app/Chatbot/components/promptManagementModal/__tests__/usePromptQueries.spec.ts b/packages/gen-ai/frontend/src/app/Chatbot/components/promptManagementModal/__tests__/usePromptQueries.spec.ts
@@ -33,6 +33,13 @@ jest.mock('~/app/hooks/useGenAiAPI', () => ({
   })),
 }));
 
+jest.mock('~/app/context/GenAiContext', () => {
+  const React = jest.requireActual('react');
+  return {
+    GenAiContext: React.createContext({ namespace: { name: 'test-namespace' } }),
+  };
+});
+
 const mockUseQuery = jest.mocked(useQuery);
 const mockUseInfiniteQuery = jest.mocked(useInfiniteQuery);
 
@@ -165,7 +172,7 @@ describe('usePromptsList', () => {
 
     expect(mockUseInfiniteQuery).toHaveBeenCalledWith(
       expect.objectContaining({
-        queryKey: ['prompts', 'list', { maxResults: 10, filterName: 'test' }],
+        queryKey: ['test-namespace_prompts', 'list', { maxResults: 10, filterName: 'test' }],
       }),
     );
 
@@ -264,7 +271,7 @@ describe('usePromptVersions', () => {
     expect(result.current.error).toEqual(mockError);
   });
 
-  it('should include prompt name in query key', () => {
+  it('should include namespace and prompt name in query key', () => {
     mockUseQuery.mockReturnValue({
       data: [],
       isLoading: false,
@@ -275,7 +282,7 @@ describe('usePromptVersions', () => {
 
     expect(mockUseQuery).toHaveBeenCalledWith(
       expect.objectContaining({
-        queryKey: ['prompts', 'my-prompt', 'versions'],
+        queryKey: ['test-namespace_prompts', 'my-prompt', 'versions'],
       }),
     );
   });
diff --git a/packages/gen-ai/frontend/src/app/Chatbot/components/promptManagementModal/promptDrawer.tsx b/packages/gen-ai/frontend/src/app/Chatbot/components/promptManagementModal/promptDrawer.tsx
@@ -108,19 +108,19 @@ export default function PromptDrawer({
               style={{ minHeight: '200px' }}
               resizeOrientation="vertical"
               aria-label="prompt template"
-              value={JSON.stringify(
-                template || messages?.find((m) => m.role === 'system')?.content,
-                null,
-                2,
-              )}
+              value={template || messages?.find((m) => m.role === 'system')?.content}
               readOnlyVariant="default"
             />
           </div>
           <DescriptionList isHorizontal horizontalTermWidthModifier={{ default: '20ch' }}>
             <DescriptionListGroup>
               <DescriptionListTerm>Last Modified:</DescriptionListTerm>
               <DescriptionListDescription>
-                <Timestamp date={new Date(updatedAt)} dateFormat={TimestampFormat.full} />
+                <Timestamp
+                  date={new Date(updatedAt)}
+                  dateFormat={TimestampFormat.full}
+                  style={{ fontSize: '14px' }}
+                />
               </DescriptionListDescription>
             </DescriptionListGroup>
             <DescriptionListGroup>
diff --git a/packages/gen-ai/frontend/src/app/Chatbot/components/promptManagementModal/usePromptQueries.ts b/packages/gen-ai/frontend/src/app/Chatbot/components/promptManagementModal/usePromptQueries.ts
@@ -1,5 +1,7 @@
 import { useInfiniteQuery, useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
+import { useContext } from 'react';
 import { useGenAiAPI } from '~/app/hooks/useGenAiAPI';
+import { GenAiContext } from '~/app/context/GenAiContext';
 import {
   MLflowPrompt,
   MLflowPromptsResponse,
@@ -24,6 +26,7 @@ type UsePromptsListResult = {
 export function usePromptsList(options: UsePromptsListOptions = {}): UsePromptsListResult {
   const { api, apiAvailable } = useGenAiAPI();
   const { maxResults, filterName } = options;
+  const { namespace } = useContext(GenAiContext);
 
   const { data, isLoading, isFetchingNextPage, hasNextPage, fetchNextPage, error } =
     useInfiniteQuery<
@@ -33,7 +36,7 @@ export function usePromptsList(options: UsePromptsListOptions = {}): UsePromptsL
       [string, string, { maxResults?: number; filterName?: string }],
       string | undefined
     >({
-      queryKey: ['prompts', 'list', { maxResults, filterName }],
+      queryKey: [`${namespace?.name}_prompts`, 'list', { maxResults, filterName }],
       queryFn: async ({ pageParam }) => {
         const queryParams: Record<string, unknown> = {};
         if (maxResults !== undefined) {
@@ -75,9 +78,10 @@ type UsePromptVersionsResult = {
 
 export function usePromptVersions(promptName: string | null): UsePromptVersionsResult {
   const { api, apiAvailable } = useGenAiAPI();
+  const { namespace } = useContext(GenAiContext);
 
   const { data, isLoading, error } = useQuery({
-    queryKey: ['prompts', promptName, 'versions'],
+    queryKey: [`${namespace?.name}_prompts`, promptName, 'versions'],
     queryFn: async () => {
       if (!promptName) {
         return [];
@@ -109,9 +113,10 @@ type UseLatestPromptVersionResult = {
 
 export function useLatestPromptVersion(promptName: string | null): UseLatestPromptVersionResult {
   const { api, apiAvailable } = useGenAiAPI();
+  const { namespace } = useContext(GenAiContext);
 
   const { data, isLoading, error } = useQuery({
-    queryKey: ['prompts', promptName, 'latest'],
+    queryKey: [`${namespace?.name}_prompts`, promptName, 'latest'],
     queryFn: () => api.getMLflowPrompt({ name: promptName! }),
     enabled: !!promptName && apiAvailable,
     staleTime: 0,
@@ -138,6 +143,7 @@ type UseCreatePromptResult = {
 export function useCreatePrompt(options: UseCreatePromptOptions = {}): UseCreatePromptResult {
   const { api, apiAvailable } = useGenAiAPI();
   const queryClient = useQueryClient();
+  const { namespace } = useContext(GenAiContext);
   const { onSuccess, onError } = options;
 
   const { mutate, isPending, error } = useMutation<
@@ -152,8 +158,10 @@ export function useCreatePrompt(options: UseCreatePromptOptions = {}): UseCreate
       return api.registerMLflowPrompt(request);
     },
     onSuccess: (data) => {
-      queryClient.invalidateQueries({ queryKey: ['prompts', 'list'] });
-      queryClient.invalidateQueries({ queryKey: ['prompts', data.name, 'versions'] });
+      queryClient.invalidateQueries({ queryKey: [`${namespace?.name}_prompts`, 'list'] });
+      queryClient.invalidateQueries({
+        queryKey: [`${namespace?.name}_prompts`, data.name, 'versions'],
+      });
       onSuccess?.(data);
     },
     onError,
diff --git a/packages/llmd-serving/src/deployments/__tests__/server.spec.ts b/packages/llmd-serving/src/deployments/__tests__/server.spec.ts
@@ -35,4 +35,33 @@ describe('applyConfigBaseRef', () => {
     const result = applyConfigBaseRef(svc, undefined);
     expect(result.spec.baseRefs).toEqual([{ name: 'other-config' }]);
   });
+
+  it('should keep scheduler when baseRef is undefined (llm-d selection)', () => {
+    const svc = mockLLMInferenceServiceK8sResource({ name: 'my-deployment' });
+    const result = applyConfigBaseRef(svc, undefined);
+    expect(result.spec.router?.scheduler).toEqual({});
+  });
+
+  it('should keep scheduler when baseRef does not match the resource name', () => {
+    const svc = mockLLMInferenceServiceK8sResource({});
+    const result = applyConfigBaseRef(svc, 'unrelated-config');
+    expect(result.spec.router?.scheduler).toEqual({});
+  });
+
+  it('should remove scheduler when baseRef matches the resource name (non-llm-d selection)', () => {
+    const svc = mockLLMInferenceServiceK8sResource({ name: 'my-deployment' });
+    const result = applyConfigBaseRef(svc, 'my-deployment');
+    expect(result.spec.router?.scheduler).toBeUndefined();
+    expect(result.spec.router?.route).toBeDefined();
+    expect(result.spec.router?.gateway).toBeDefined();
+  });
+
+  it('should remove scheduler when resource already has a self-referencing baseRef', () => {
+    const svc = mockLLMInferenceServiceK8sResource({
+      name: 'my-deployment',
+      baseRefs: [{ name: 'my-deployment' }],
+    });
+    const result = applyConfigBaseRef(svc, 'my-deployment');
+    expect(result.spec.router?.scheduler).toBeUndefined();
+  });
 });
diff --git a/packages/llmd-serving/src/deployments/server.ts b/packages/llmd-serving/src/deployments/server.ts
@@ -47,10 +47,22 @@ export const applyConfigBaseRef = (
     );
   }
 
+  // Add or remove baseRef
   if (baseRef && !result.spec.baseRefs?.some((ref) => ref.name === baseRef)) {
     result.spec.baseRefs = [...(result.spec.baseRefs ?? []), { name: baseRef }];
   } else if (!baseRef && result.spec.baseRefs?.find((ref) => ref.name === k8sName)) {
     result.spec.baseRefs = result.spec.baseRefs.filter((ref) => ref.name !== k8sName);
   }
+
+  // Remove scheduler if no baseRef (llmd-serving basically)
+  if (baseRef && result.spec.baseRefs?.find((ref) => ref.name === k8sName)) {
+    delete result.spec.router?.scheduler;
+  } else {
+    result.spec.router = {
+      ...result.spec.router,
+      scheduler: {},
+    };
+  }
+
   return result;
 };