Skip to content

Commit 18156b1

Browse files
committed
fix(kbn-evals-suite-security-alert-triage): update for kbn-evals API changes
@kbn/evals updated its API: runExperiment now takes `datasets: T[]` (plural array) instead of `dataset: T` (singular), and Example.input is now optional. Two spec files needed updating: - Switch from `dataset: {...}` to `datasets: [{...} satisfies EvaluationDataset]` - Move task definition inline so TypeScript infers the parameter type from TEvaluationDataset rather than falling back to Example defaults, which triggered a contravariance error on the typed ExperimentTask - Add `extends Example` to TriageEvalExample and AlertEvalExample so they satisfy EvaluationDataset's TExample constraint - Drop ExperimentTask/TaskOutput imports (no longer needed in specs) - Add EvaluationDataset import Refs #17496
1 parent 8589d10 commit 18156b1

2 files changed

Lines changed: 35 additions & 34 deletions

File tree

x-pack/solutions/security/packages/kbn-evals-suite-security-alert-triage/evals/alert_triage_quality.spec.ts

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ import type { EsClient } from '@kbn/scout';
3030
import {
3131
selectEvaluators,
3232
type DefaultEvaluators,
33+
type EvaluationDataset,
3334
type EvalsExecutorClient,
34-
type ExperimentTask,
35-
type TaskOutput,
35+
type Example,
3636
} from '@kbn/evals';
3737
import type { ToolingLog } from '@kbn/tooling-log';
3838
import type { HttpHandler } from '@kbn/core/public';
@@ -64,7 +64,7 @@ const toAlertAttachments = (ids: string[]) => {
6464

6565
// ── Types ─────────────────────────────────────────────────────────────────────
6666

67-
interface TriageEvalExample {
67+
interface TriageEvalExample extends Example {
6868
input: { question: string };
6969
output: { expected: string };
7070
metadata?: {
@@ -95,17 +95,6 @@ function createEvaluateTriageQuality({
9595
dataset: { name: string; description: string; examples: TriageEvalExample[] };
9696
criteria: string[];
9797
}) {
98-
const task: ExperimentTask<TriageEvalExample, TaskOutput> = async ({ input, metadata }) => {
99-
const { attachments = [] } = metadata ?? {};
100-
return callConverse({
101-
fetch,
102-
connectorId: connector.id,
103-
question: input.question,
104-
attachments,
105-
log,
106-
});
107-
};
108-
10998
const selectedEvaluators = selectEvaluators([
11099
evaluators.criteria(criteria),
111100
attachmentReadCompliance,
@@ -114,12 +103,23 @@ function createEvaluateTriageQuality({
114103

115104
await executorClient.runExperiment(
116105
{
117-
dataset: {
118-
name: dataset.name,
119-
description: dataset.description,
120-
examples: dataset.examples,
106+
datasets: [
107+
{
108+
name: dataset.name,
109+
description: dataset.description,
110+
examples: dataset.examples,
111+
} satisfies EvaluationDataset,
112+
],
113+
task: async ({ input, metadata }) => {
114+
const { attachments = [] } = metadata ?? {};
115+
return callConverse({
116+
fetch,
117+
connectorId: connector.id,
118+
question: input.question,
119+
attachments,
120+
log,
121+
});
121122
},
122-
task,
123123
},
124124
selectedEvaluators
125125
);

x-pack/solutions/security/packages/kbn-evals-suite-security-alert-triage/evals/bulk_alerts_attachment_read.spec.ts

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ import { tags } from '@kbn/scout';
2121
import {
2222
selectEvaluators,
2323
type DefaultEvaluators,
24+
type EvaluationDataset,
2425
type EvalsExecutorClient,
25-
type ExperimentTask,
26-
type TaskOutput,
26+
type Example,
2727
} from '@kbn/evals';
2828
import type { ToolingLog } from '@kbn/tooling-log';
2929
import type { HttpHandler } from '@kbn/core/public';
@@ -52,7 +52,7 @@ const alertBatches: Array<{ alertIds: string[] }> = Array.from(
5252

5353
// ── Types ──────────────────────────────────────────────────────────────────────
5454

55-
interface AlertEvalExample {
55+
interface AlertEvalExample extends Example {
5656
input: { question: string };
5757
output: { expected: string };
5858
metadata?: {
@@ -81,24 +81,25 @@ function createEvaluateAlertBatches({
8181
}: {
8282
dataset: { name: string; description: string; examples: AlertEvalExample[] };
8383
}) {
84-
const task: ExperimentTask<AlertEvalExample, TaskOutput> = async ({ input, metadata }) => {
85-
const attachments = metadata?.attachments ?? [];
86-
return callConverse({
87-
fetch,
88-
connectorId: connector.id,
89-
question: input.question,
90-
attachments,
91-
log,
92-
});
93-
};
94-
9584
const selectedEvaluators = selectEvaluators([
9685
attachmentReadCompliance,
9786
...Object.values(evaluators.traceBasedEvaluators),
9887
]);
9988

10089
await executorClient.runExperiment(
101-
{ dataset: { name, description, examples }, task },
90+
{
91+
datasets: [{ name, description, examples } satisfies EvaluationDataset],
92+
task: async ({ input, metadata }) => {
93+
const attachments = metadata?.attachments ?? [];
94+
return callConverse({
95+
fetch,
96+
connectorId: connector.id,
97+
question: input.question,
98+
attachments,
99+
log,
100+
});
101+
},
102+
},
102103
selectedEvaluators
103104
);
104105
};

0 commit comments

Comments
 (0)