Skip to content

Commit b316244

Browse files
danielseong1Daniel Seongclaude
authored
[ML-59575] Scorers UI updates (mlflow#19001)
Signed-off-by: Daniel Seong <[email protected]> Co-authored-by: Daniel Seong <[email protected]> Co-authored-by: Claude <[email protected]>
1 parent ef5ebd5 commit b316244

37 files changed

+2103
-1369
lines changed

mlflow/server/js/src/experiment-tracking/components/experiment-page/components/header/tab-selector-bar/TabSelectorBarConstants.tsx

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ import Routes from '@mlflow/mlflow/src/experiment-tracking/routes';
22

33
import {
44
GearIcon,
5+
GavelIcon,
56
ListBorderIcon,
67
ListIcon,
78
ModelsIcon,
89
PlusMinusSquareIcon,
910
UserIcon,
1011
TextBoxIcon,
11-
SparkleIcon,
1212
} from '@databricks/design-system';
1313
import { ExperimentPageTabName } from '@mlflow/mlflow/src/experiment-tracking/constants';
1414
import { FormattedMessage } from 'react-intl';
@@ -63,13 +63,10 @@ const ModelsTabConfig = {
6363
};
6464
const ScorersTabConfig = {
6565
label: (
66-
<FormattedMessage
67-
defaultMessage="Scorers"
68-
description="Label for the scorers tab in the MLflow experiment navbar"
69-
/>
66+
<FormattedMessage defaultMessage="Judges" description="Label for the judges tab in the MLflow experiment navbar" />
7067
),
71-
icon: <SparkleIcon />,
72-
getRoute: (experimentId: string) => Routes.getExperimentPageTabRoute(experimentId, ExperimentPageTabName.Scorers),
68+
icon: <GavelIcon />,
69+
getRoute: (experimentId: string) => Routes.getExperimentPageTabRoute(experimentId, ExperimentPageTabName.Judges),
7370
};
7471

7572
export type GenAIExperimentTabConfigMapProps = {
@@ -83,7 +80,7 @@ export const getGenAIExperimentTabConfigMap = ({
8380
[ExperimentPageTabName.Traces]: TracesTabConfig,
8481
[ExperimentPageTabName.EvaluationRuns]: EvaluationsTabConfig,
8582
[ExperimentPageTabName.Models]: ModelsTabConfig,
86-
...(enableScorersUI() && { [ExperimentPageTabName.Scorers]: ScorersTabConfig }),
83+
...(enableScorersUI() && { [ExperimentPageTabName.Judges]: ScorersTabConfig }),
8784
});
8885

8986
export const getGenAIExperimentWithPromptsTabConfigMap = ({
@@ -92,7 +89,7 @@ export const getGenAIExperimentWithPromptsTabConfigMap = ({
9289
...(includeRunsTab && { [ExperimentPageTabName.Runs]: RunsTabConfig }),
9390
[ExperimentPageTabName.Traces]: TracesTabConfig,
9491
[ExperimentPageTabName.Models]: ModelsTabConfig,
95-
...(enableScorersUI() && { [ExperimentPageTabName.Scorers]: ScorersTabConfig }),
92+
...(enableScorersUI() && { [ExperimentPageTabName.Judges]: ScorersTabConfig }),
9693
});
9794

9895
export const GenAIExperimentWithPromptsTabConfigMap = getGenAIExperimentTabConfigMap();
@@ -109,9 +106,9 @@ export const CustomExperimentTabConfigMap: TabConfigMap = {
109106
),
110107
},
111108
[ExperimentPageTabName.Traces]: TracesTabConfig,
109+
...(enableScorersUI() && { [ExperimentPageTabName.Judges]: ScorersTabConfig }),
112110
};
113111

114112
export const DefaultTabConfigMap: TabConfigMap = {
115113
...CustomExperimentTabConfigMap,
116-
...(enableScorersUI() && { [ExperimentPageTabName.Scorers]: ScorersTabConfig }),
117114
};

mlflow/server/js/src/experiment-tracking/components/experiment-page/hooks/useGetExperimentPageActiveTabByRoute.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ const ExperimentPageRoutePathToTabNameMap = map(
1414
[RoutePaths.experimentPageTabDatasets]: ExperimentPageTabName.Datasets,
1515
[RoutePaths.experimentPageTabChatSessions]: ExperimentPageTabName.ChatSessions,
1616
[RoutePaths.experimentPageTabSingleChatSession]: ExperimentPageTabName.SingleChatSession,
17-
[RoutePaths.experimentPageTabScorers]: ExperimentPageTabName.Scorers,
17+
[RoutePaths.experimentPageTabScorers]: ExperimentPageTabName.Judges,
1818
},
1919
(tabName, routePath) => ({ routePath, tabName }),
2020
);

mlflow/server/js/src/experiment-tracking/constants.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ export enum ExperimentPageTabName {
143143
Traces = 'traces',
144144
Models = 'models',
145145
EvaluationMonitoring = 'evaluation-monitoring',
146-
Scorers = 'scorers',
146+
Judges = 'judges',
147147
EvaluationRuns = 'evaluation-runs',
148148
Datasets = 'datasets',
149149
LabelingSessions = 'labeling-sessions',

mlflow/server/js/src/experiment-tracking/pages/experiment-page-tabs/side-nav/constants.tsx

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
UserGroupIcon,
1414
} from '@databricks/design-system';
1515
import { FormattedMessage } from 'react-intl';
16+
import { enableScorersUI } from '@mlflow/mlflow/src/common/utils/FeatureUtils';
1617

1718
export const FULL_WIDTH_CLASS_NAME = 'mlflow-experiment-page-side-nav-full';
1819
export const COLLAPSED_CLASS_NAME = 'mlflow-experiment-page-side-nav-collapsed';
@@ -43,16 +44,6 @@ const ExperimentPageSideNavGenAIConfig = {
4344
},
4445
],
4546
evaluation: [
46-
{
47-
label: (
48-
<FormattedMessage
49-
defaultMessage="Scorers"
50-
description="Label for the scorers tab in the MLflow experiment navbar"
51-
/>
52-
),
53-
icon: <GavelIcon />,
54-
tabName: ExperimentPageTabName.Scorers,
55-
},
5647
{
5748
label: (
5849
<FormattedMessage
@@ -196,6 +187,21 @@ export const useExperimentPageSideNavConfig = ({
196187
tabName: ExperimentPageTabName.ChatSessions,
197188
},
198189
],
190+
evaluation: enableScorersUI()
191+
? [
192+
...ExperimentPageSideNavGenAIConfig.evaluation,
193+
{
194+
label: (
195+
<FormattedMessage
196+
defaultMessage="Judges"
197+
description="Label for the judges tab in the MLflow experiment navbar"
198+
/>
199+
),
200+
icon: <GavelIcon />,
201+
tabName: ExperimentPageTabName.Judges,
202+
},
203+
]
204+
: ExperimentPageSideNavGenAIConfig.evaluation,
199205
};
200206
}
201207

mlflow/server/js/src/experiment-tracking/pages/experiment-scorers/CustomCodeScorerFormRenderer.tsx

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ const CustomCodeScorerFormRenderer: React.FC<CustomCodeScorerFormRendererProps>
7272
};
7373

7474
if (mode === SCORER_FORM_MODE.CREATE) {
75-
const step1Code = `pip install --upgrade "mlflow[databricks]>=3.1.0"`;
75+
const step1Code = `pip install --upgrade "mlflow>=3.1.0"`;
7676

7777
const step2Code = `from mlflow.genai.scorers import scorer, ScorerSamplingConfig
7878
from typing import Optional, Any
@@ -93,15 +93,14 @@ def my_custom_scorer(
9393
# TODO: Implement your custom scoring logic
9494
return 1.0`;
9595

96-
const step3Code = `custom_scorer = my_custom_scorer.register(name="my_custom_scorer")
97-
custom_scorer = custom_scorer.start(sampling_config=ScorerSamplingConfig(sample_rate=0.5))`;
96+
const step3Code = `custom_scorer = my_custom_scorer.register(name="my_custom_scorer")`;
9897

9998
return (
10099
<div css={{ display: 'flex', flexDirection: 'column' }}>
101100
<Typography.Text>
102101
<FormattedMessage
103-
defaultMessage="Follow these steps to create a custom scorer using your own code. {link}"
104-
description="Brief instructions for custom scorer functions"
102+
defaultMessage="Follow these steps to create a custom judge using your own code. {link}"
103+
description="Brief instructions for custom judge functions"
105104
values={{
106105
link: (
107106
<Typography.Link
@@ -120,12 +119,12 @@ custom_scorer = custom_scorer.start(sampling_config=ScorerSamplingConfig(sample_
120119
<Typography.Title level={4} css={{ marginBottom: theme.spacing.sm }}>
121120
<FormattedMessage
122121
defaultMessage="Step 1: Install MLflow"
123-
description="Step 1 title for custom scorer creation"
122+
description="Step 1 title for custom judge creation"
124123
/>
125124
</Typography.Title>
126125
<Typography.Text css={{ display: 'block', marginBottom: theme.spacing.md, maxWidth: 800 }}>
127126
<FormattedMessage
128-
defaultMessage="Install or upgrade MLflow with the Databricks extras to ensure you have the latest scorer functionality."
127+
defaultMessage="Install or upgrade MLflow to ensure you have the latest judge functionality."
129128
description="Step 1 description for installing MLflow"
130129
/>
131130
</Typography.Text>
@@ -140,14 +139,14 @@ custom_scorer = custom_scorer.start(sampling_config=ScorerSamplingConfig(sample_
140139
<div>
141140
<Typography.Title level={4} css={{ marginBottom: theme.spacing.sm }}>
142141
<FormattedMessage
143-
defaultMessage="Step 2: Define your scorer function"
144-
description="Step 2 title for custom scorer creation"
142+
defaultMessage="Step 2: Define your judge function"
143+
description="Step 2 title for custom judge creation"
145144
/>
146145
</Typography.Title>
147146
<Typography.Text css={{ display: 'block', marginBottom: theme.spacing.md, maxWidth: 800 }}>
148147
<FormattedMessage
149-
defaultMessage="Create a custom scorer function using the {decorator} decorator. Implement your scoring logic in the function body. {link}"
150-
description="Step 2 description for defining scorer function"
148+
defaultMessage="Create a custom judge function using the {decorator} decorator. Implement your scoring logic in the function body. {link}"
149+
description="Step 2 description for defining judge function"
151150
values={{
152151
decorator: <Typography.Text code>@scorer</Typography.Text>,
153152
link: (
@@ -173,14 +172,14 @@ custom_scorer = custom_scorer.start(sampling_config=ScorerSamplingConfig(sample_
173172
<div>
174173
<Typography.Title level={4} css={{ marginBottom: theme.spacing.sm }}>
175174
<FormattedMessage
176-
defaultMessage="Step 3: Register and start the scorer"
177-
description="Step 3 title for custom scorer creation"
175+
defaultMessage="Step 3: Register the judge"
176+
description="Step 3 title for custom judge creation"
178177
/>
179178
</Typography.Title>
180179
<Typography.Text css={{ display: 'block', marginBottom: theme.spacing.md, maxWidth: 800 }}>
181180
<FormattedMessage
182-
defaultMessage="Register your scorer and start it with a sampling configuration. The scorer will then be available for use and will show up in this UI."
183-
description="Step 3 description for registering and starting scorer"
181+
defaultMessage="Register your judge. The judge will then show up in this UI."
182+
description="Step 3 description for registering and starting judge"
184183
/>
185184
</Typography.Text>
186185
<CodeBlockWithCopy

mlflow/server/js/src/experiment-tracking/pages/experiment-scorers/DeleteScorerModalRenderer.tsx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ export const DeleteScorerModalRenderer: React.FC<DeleteScorerModalRendererProps>
2727
<DangerModal
2828
componentId={`${COMPONENT_ID_PREFIX}.delete-modal`}
2929
title={
30-
<FormattedMessage defaultMessage="Delete scorer" description="Title for the delete scorer confirmation modal" />
30+
<FormattedMessage defaultMessage="Delete judge" description="Title for the delete judge confirmation modal" />
3131
}
3232
visible={isOpen}
3333
onCancel={onClose}
@@ -36,8 +36,8 @@ export const DeleteScorerModalRenderer: React.FC<DeleteScorerModalRendererProps>
3636
>
3737
<>
3838
<FormattedMessage
39-
defaultMessage="Are you sure you want to delete the scorer ''{scorerName}''? This action cannot be undone."
40-
description="Confirmation message for deleting a scorer"
39+
defaultMessage="Are you sure you want to delete the judge ''{scorerName}''? This action cannot be undone."
40+
description="Confirmation message for deleting a judge"
4141
values={{ scorerName: scorer.name }}
4242
/>
4343
{error && (

mlflow/server/js/src/experiment-tracking/pages/experiment-scorers/EvaluateTracesSectionRenderer.tsx

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ const EvaluateTracesSectionRenderer: React.FC<EvaluateTracesSectionRendererProps
2626
control,
2727
name: 'sampleRate',
2828
});
29+
const disableMonitoring = useWatch({
30+
control,
31+
name: 'disableMonitoring',
32+
});
2933

3034
const isAutomaticEvaluationEnabled = sampleRate > 0;
3135

@@ -38,6 +42,10 @@ const EvaluateTracesSectionRenderer: React.FC<EvaluateTracesSectionRendererProps
3842
flexDirection: 'column' as const,
3943
};
4044

45+
if (disableMonitoring) {
46+
return null;
47+
}
48+
4149
return (
4250
<>
4351
{/* Evaluation settings section header */}
@@ -68,7 +76,7 @@ const EvaluateTracesSectionRenderer: React.FC<EvaluateTracesSectionRendererProps
6876
onClick={stopPropagationClick}
6977
>
7078
<FormattedMessage
71-
defaultMessage="Automatically evaluate future traces using this scorer"
79+
defaultMessage="Automatically evaluate future traces using this judge"
7280
description="Checkbox label for enabling automatic evaluation"
7381
/>
7482
</Checkbox>
@@ -89,7 +97,7 @@ const EvaluateTracesSectionRenderer: React.FC<EvaluateTracesSectionRendererProps
8997
</FormUI.Label>
9098
<FormUI.Hint>
9199
<FormattedMessage
92-
defaultMessage="Percentage of traces evaluated by this scorer."
100+
defaultMessage="Percentage of traces evaluated by this judge."
93101
description="Hint text for sample rate slider"
94102
/>
95103
</FormUI.Hint>

mlflow/server/js/src/experiment-tracking/pages/experiment-scorers/ExperimentScorersContentContainer.tsx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ const ExperimentScorersContentContainer: React.FC<ExperimentScorersContentContai
5050
{[...Array(3).keys()].map((i) => (
5151
<ParagraphSkeleton
5252
label={intl.formatMessage({
53-
defaultMessage: 'Loading scorers...',
54-
description: 'Loading message while fetching experiment scorers',
53+
defaultMessage: 'Loading judges...',
54+
description: 'Loading message while fetching experiment judges',
5555
})}
5656
key={i}
5757
seed={`scorer-${i}`}
@@ -90,7 +90,7 @@ const ExperimentScorersContentContainer: React.FC<ExperimentScorersContentContai
9090
componentId={`${COMPONENT_ID_PREFIX}.new-scorer-button`}
9191
onClick={handleNewScorerClick}
9292
>
93-
<FormattedMessage defaultMessage="New scorer" description="Button text to create a new scorer" />
93+
<FormattedMessage defaultMessage="New judge" description="Button text to create a new judge" />
9494
</Button>
9595
</div>
9696
<Spacer size="sm" />

mlflow/server/js/src/experiment-tracking/pages/experiment-scorers/ExperimentScorersPage.tsx

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,17 @@ const ErrorFallback = ({ error }: { error?: Error }) => {
3232
<Empty
3333
title={
3434
<FormattedMessage
35-
defaultMessage="Unable to load experiment scorers"
36-
description="Error message when experiment scorers page fails to load"
35+
defaultMessage="Unable to load experiment judges"
36+
description="Error message when experiment judges page fails to load"
3737
/>
3838
}
3939
description={
4040
error ? (
4141
<span>{error.message}</span>
4242
) : (
4343
<FormattedMessage
44-
defaultMessage="We encountered an issue loading the scorers interface. Please refresh the page or contact support if the problem persists."
45-
description="Error description for experiment scorers page loading failure"
44+
defaultMessage="We encountered an issue loading the judges interface. Please refresh the page or contact support if the problem persists."
45+
description="Error description for experiment judges page loading failure"
4646
/>
4747
)
4848
}
@@ -87,16 +87,16 @@ const ExperimentScorersPage: React.FC<ExperimentScorersPageProps> = () => {
8787
image={<SparkleIcon css={{ fontSize: 48, color: theme.colors.textSecondary }} />}
8888
title={
8989
<FormattedMessage
90-
defaultMessage="Create and manage scorers"
91-
description="Title for the empty state of the scorers page"
90+
defaultMessage="Create and manage judges"
91+
description="Title for the empty state of the judges page"
9292
/>
9393
}
9494
description={
9595
<div css={{ maxWidth: 600, textAlign: 'center' }}>
9696
<Spacer size="sm" />
9797
<FormattedMessage
98-
defaultMessage="Configure predefined scorers, create guidelines-based LLM scorers, or build custom scorer functions to track your unique metrics. {link}"
99-
description="Description for the empty state of the scorers page"
98+
defaultMessage="Configure predefined judges, create guidelines-based LLM judges, or build custom judge functions to track your unique metrics. {link}"
99+
description="Description for the empty state of the judges page"
100100
values={{
101101
link: (
102102
<Typography.Link
@@ -106,8 +106,8 @@ const ExperimentScorersPage: React.FC<ExperimentScorersPageProps> = () => {
106106
rel="noreferrer"
107107
>
108108
<FormattedMessage
109-
defaultMessage="Learn more about configuring scorers"
110-
description="Link text for configuring scorers documentation"
109+
defaultMessage="Learn more about configuring judges"
110+
description="Link text for configuring judges documentation"
111111
/>
112112
</Typography.Link>
113113
),

0 commit comments

Comments
 (0)