Skip to content

Commit 7516cfc

Browse files
support more models for llm-as-judge functionality, expose what models/providers are available based on API keys in ENV vars
1 parent 0caa140 commit 7516cfc

5 files changed

Lines changed: 74 additions & 11 deletions

File tree

src/agentevals/api/routes.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,18 @@ async def health_check():
2828
return {"status": "ok", "version": __version__}
2929

3030

31+
@router.get("/config")
32+
async def get_config():
33+
"""Return environment configuration, including which API keys are available."""
34+
return {
35+
"apiKeys": {
36+
"google": bool(os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")),
37+
"anthropic": bool(os.environ.get("ANTHROPIC_API_KEY")),
38+
"openai": bool(os.environ.get("OPENAI_API_KEY")),
39+
}
40+
}
41+
42+
3143
@router.get("/metrics")
3244
async def list_metrics():
3345
"""List available metrics with metadata.

ui/src/api/client.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,14 @@ export async function validateEvalSet(evalSetFile: File) {
215215
}
216216
}
217217

218+
export async function getConfig(): Promise<{ apiKeys: { google: boolean; anthropic: boolean; openai: boolean } }> {
219+
const response = await fetch(`${API_BASE_URL}/config`);
220+
if (!response.ok) {
221+
throw new Error(`Failed to fetch config: ${response.statusText}`);
222+
}
223+
return response.json();
224+
}
225+
218226
/**
219227
* Health check
220228
*/

ui/src/components/upload/UploadView.tsx

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -173,11 +173,13 @@ const uploadViewStyle = css`
173173
`;
174174

175175
const JUDGE_MODELS = [
176-
'gemini-2.5-flash',
177-
'gemini-2.0-flash',
178-
'claude-3.5-sonnet',
179-
'gpt-4o',
180-
];
176+
{ value: 'gemini-2.5-flash', provider: 'google' },
177+
{ value: 'gemini-2.0-flash', provider: 'google' },
178+
{ value: 'anthropic/claude-3.5-sonnet', provider: 'anthropic' },
179+
{ value: 'openai/gpt-4o', provider: 'openai' },
180+
] as const;
181+
182+
type JudgeModelProvider = typeof JUDGE_MODELS[number]['provider'];
181183

182184
export const UploadView: React.FC = () => {
183185
const { state, actions } = useTraceContext();
@@ -430,13 +432,40 @@ export const UploadView: React.FC = () => {
430432
<Select
431433
value={state.judgeModel}
432434
onChange={actions.setJudgeModel}
433-
options={JUDGE_MODELS.map((model) => ({ label: model, value: model }))}
435+
options={JUDGE_MODELS.map(({ value }) => ({ label: value, value }))}
434436
style={{ width: '100%' }}
435437
size="small"
436438
/>
437-
<span className="setting-hint">
438-
LLM for judge-based metrics
439-
</span>
439+
{state.apiKeyStatus && (
440+
<div style={{ display: 'flex', gap: 10, marginTop: 4, flexWrap: 'wrap' }}>
441+
{([
442+
{ label: 'GOOGLE_API_KEY', provider: 'google' as const },
443+
{ label: 'ANTHROPIC_API_KEY', provider: 'anthropic' as const },
444+
{ label: 'OPENAI_API_KEY', provider: 'openai' as const },
445+
] as const).map(({ label, provider }) => {
446+
const ok = state.apiKeyStatus![provider];
447+
return (
448+
<span key={provider} style={{ display: 'flex', alignItems: 'center', gap: 4, fontSize: 10, color: ok ? 'var(--status-success)' : 'var(--status-failure)' }}>
449+
<span style={{ width: 6, height: 6, borderRadius: '50%', backgroundColor: ok ? 'var(--status-success)' : 'var(--status-failure)', flexShrink: 0 }} />
450+
{label}
451+
</span>
452+
);
453+
})}
454+
</div>
455+
)}
456+
{(() => {
457+
const selected = JUDGE_MODELS.find((m) => m.value === state.judgeModel);
458+
const provider = selected?.provider as JudgeModelProvider | undefined;
459+
if (!provider || !state.apiKeyStatus || state.apiKeyStatus[provider]) return null;
460+
const keyName = provider === 'google' ? 'GOOGLE_API_KEY / GEMINI_API_KEY'
461+
: provider === 'anthropic' ? 'ANTHROPIC_API_KEY'
462+
: 'OPENAI_API_KEY';
463+
return (
464+
<span style={{ fontSize: '11px', color: 'var(--status-failure)', marginTop: 2 }}>
465+
{keyName} is not set — this model will fail
466+
</span>
467+
);
468+
})()}
440469
</div>
441470

442471
<div className="setting-item" style={{ marginTop: 10 }}>

ui/src/context/TraceContext.tsx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@ import { createContext, useContext } from 'react';
22
import type { TraceResult, ViewType, EvalSet, EvalSetMetadata, EvalCase, TraceTableRow, LiveSession, AnnotationQueue, Annotation } from '../lib/types';
33
import type { TraceMetadata } from '../lib/trace-metadata';
44

5+
export interface ApiKeyStatus {
6+
google: boolean;
7+
anthropic: boolean;
8+
openai: boolean;
9+
}
10+
511
export interface TraceState {
612
// Upload state
713
traceFiles: File[];
@@ -11,6 +17,7 @@ export interface TraceState {
1117
threshold: number;
1218
traceMetadata: Map<string, TraceMetadata>;
1319
isLoadingMetadata: boolean;
20+
apiKeyStatus: ApiKeyStatus | null;
1421

1522
// Evaluation state
1623
isEvaluating: boolean;

ui/src/context/TraceProvider.tsx

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import React, { useState, useMemo } from 'react';
1+
import React, { useState, useMemo, useEffect } from 'react';
22
import type { ReactNode } from 'react';
33
import { TraceContext } from './TraceContext';
44
import type { TraceState } from './TraceContext';
55
import type { ViewType, EvalSet, EvalSetMetadata, EvalCase, LiveSession, AnnotationQueue, Annotation } from '../lib/types';
6-
import { evaluateTracesStreaming } from '../api/client';
6+
import { evaluateTracesStreaming, getConfig } from '../api/client';
77
import { extractMetadataFromTraceFile } from '../lib/trace-metadata';
88

99
interface TraceProviderProps {
@@ -19,6 +19,7 @@ export const TraceProvider: React.FC<TraceProviderProps> = ({ children }) => {
1919
threshold: 0.8,
2020
traceMetadata: new Map(),
2121
isLoadingMetadata: false,
22+
apiKeyStatus: null,
2223
isEvaluating: false,
2324
progressMessage: '',
2425
results: [],
@@ -37,6 +38,12 @@ export const TraceProvider: React.FC<TraceProviderProps> = ({ children }) => {
3738
builderSelectedTraceIds: [],
3839
});
3940

41+
useEffect(() => {
42+
getConfig()
43+
.then((cfg) => setState((prev) => ({ ...prev, apiKeyStatus: cfg.apiKeys })))
44+
.catch(() => {}); // silently ignore if backend is unreachable
45+
}, []);
46+
4047
const actions = useMemo(
4148
() => ({
4249
setTraceFiles: async (files: File[]) => {

0 commit comments

Comments
 (0)