Skip to content

Commit 7804055

Browse files
dgieselaarkibanamachinemisticviduni94
authored
[Inference] Evaluations framework (elastic#227453)
introduces `@kbn/evals` to enable offline evaluation suites for LLM-based workflows in Kibana, using: - `@kbn/scout` and Playwright for running the evaluations - `@kbn/inference-plugin` for handling LLM interactions - [Phoenix](https://github.com/Arize-ai/phoenix) for storing evaluations See: https://github.com/dgieselaar/kibana/blob/evaluation-runner/x-pack/platform/packages/shared/kbn-evals/README.md Other changes in this PR: - Implement support for `exports` in package.json in `@kbn/import-resolver` (this causes some eslint-disables to no longer be necessary) - Graceful shutdown for OpenTelemetry span processors - Add `scripts/playwright.js` script which sets up babel transforms, tracing etc Notes: - used o3 to implement support for `exports` (and then iterated on it) - used o3 to generate a [README](https://github.com/dgieselaar/kibana/blob/evaluation-runner/x-pack/platform/packages/shared/kbn-evals/README.md) (and made some minor changes) Dependencies: - Added @arizeai/phoenix-client to interact with Phoenix's API. The alternative is using REST/GraphQL directly but there's a lot of client-side logic in the SDK. --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Tiago Costa <tiago.costa@elastic.co> Co-authored-by: Tiago Costa <tiagoffcc@hotmail.com> Co-authored-by: Viduni Wickramarachchi <viduni.ushanka@gmail.com>
1 parent 8ea4ace commit 7804055

82 files changed

Lines changed: 3233 additions & 87 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.eslintrc.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,7 @@ module.exports = {
795795
'x-pack/test/profiling_api_integration/**/*.ts',
796796
'x-pack/test/security_solution_api_integration/*/test_suites/**/*',
797797
'x-pack/test/security_solution_api_integration/**/config*.ts',
798+
'**/playwright.config.ts',
798799
],
799800
rules: {
800801
'import/no-default-export': 'off',

.github/CODEOWNERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,7 @@ x-pack/platform/packages/shared/kbn-elastic-assistant @elastic/security-generati
869869
x-pack/platform/packages/shared/kbn-elastic-assistant-common @elastic/security-generative-ai
870870
x-pack/platform/packages/shared/kbn-elastic-assistant-shared-state @elastic/security-generative-ai
871871
x-pack/platform/packages/shared/kbn-entities-schema @elastic/obs-entities
872+
x-pack/platform/packages/shared/kbn-evals @elastic/appex-ai-infra
872873
x-pack/platform/packages/shared/kbn-event-stacktrace @elastic/obs-ux-infra_services-team @elastic/obs-ux-logs-team
873874
x-pack/platform/packages/shared/kbn-inference-cli @elastic/appex-ai-infra
874875
x-pack/platform/packages/shared/kbn-inference-endpoint-ui-common @elastic/appex-ai-infra
@@ -1051,6 +1052,7 @@ x-pack/solutions/observability/packages/alert-details @elastic/obs-ux-management
10511052
x-pack/solutions/observability/packages/alerting-test-data @elastic/obs-ux-management-team
10521053
x-pack/solutions/observability/packages/get-padded-alert-time-range-util @elastic/obs-ux-management-team
10531054
x-pack/solutions/observability/packages/kbn-alerts-grouping @elastic/response-ops
1055+
x-pack/solutions/observability/packages/kbn-evals-suite-obs-ai-assistant @elastic/obs-ai-assistant
10541056
x-pack/solutions/observability/packages/kbn-genai-cli @elastic/obs-knowledge-team
10551057
x-pack/solutions/observability/packages/kbn-observability-schema @elastic/obs-ux-management-team
10561058
x-pack/solutions/observability/packages/kbn-scout-oblt @elastic/appex-qa

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ role_users.json
157157
# ignore Scout temp directory
158158
.scout
159159

160+
# Playwright
161+
**/test-results/.last-run.json
162+
160163
.devcontainer/.env
161164

162165
# Ignore temporary files in oas_docs

package.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,7 @@
11341134
"@opentelemetry/exporter-trace-otlp-http": "^0.203.0",
11351135
"@opentelemetry/exporter-trace-otlp-proto": "^0.203.0",
11361136
"@opentelemetry/instrumentation": "^0.203.0",
1137+
"@opentelemetry/instrumentation-http": "^0.203.0",
11371138
"@opentelemetry/instrumentation-undici": "^0.14.0",
11381139
"@opentelemetry/otlp-exporter-base": "^0.203.0",
11391140
"@opentelemetry/semantic-conventions": "^1.36.0",
@@ -1397,6 +1398,7 @@
13971398
},
13981399
"devDependencies": {
13991400
"@apidevtools/swagger-parser": "^12.0.0",
1401+
"@arizeai/phoenix-client": "^2.3.4",
14001402
"@babel/cli": "^7.24.7",
14011403
"@babel/core": "^7.24.7",
14021404
"@babel/eslint-parser": "^7.24.7",
@@ -1542,6 +1544,8 @@
15421544
"@kbn/eslint-plugin-i18n": "link:packages/kbn-eslint-plugin-i18n",
15431545
"@kbn/eslint-plugin-imports": "link:packages/kbn-eslint-plugin-imports",
15441546
"@kbn/eslint-plugin-telemetry": "link:packages/kbn-eslint-plugin-telemetry",
1547+
"@kbn/evals": "link:x-pack/platform/packages/shared/kbn-evals",
1548+
"@kbn/evals-suite-obs-ai-assistant": "link:x-pack/solutions/observability/packages/kbn-evals-suite-obs-ai-assistant",
15451549
"@kbn/expect": "link:src/platform/packages/shared/kbn-expect",
15461550
"@kbn/failed-test-reporter-cli": "link:packages/kbn-failed-test-reporter-cli",
15471551
"@kbn/find-used-node-modules": "link:packages/kbn-find-used-node-modules",
@@ -1942,6 +1946,7 @@
19421946
"recast": "^0.23.9",
19431947
"regenerate": "^1.4.0",
19441948
"resolve": "^1.22.0",
1949+
"resolve.exports": "^2.0.3",
19451950
"rxjs-marbles": "^7.0.1",
19461951
"sass-embedded": "^1.79.6",
19471952
"sass-loader": "^10.5.2",

renovate.json

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@
197197
"cache-manager",
198198
"cache-manager-fs-hash",
199199
"keyv",
200-
"@types/cache-manager-fs-hash"
200+
"@types/cache-manager-fs-hash",
201+
"@arizeai/phoenix-client"
201202
],
202203
"reviewers": [
203204
"team:appex-ai-infra"
@@ -1932,6 +1933,25 @@
19321933
"minimumReleaseAge": "7 days",
19331934
"enabled": true
19341935
},
1936+
{
1937+
"groupName": "resolve.exports",
1938+
"matchDepNames": [
1939+
"resolve.exports"
1940+
],
1941+
"reviewers": [
1942+
"team:kibana-operations"
1943+
],
1944+
"matchBaseBranches": [
1945+
"main"
1946+
],
1947+
"labels": [
1948+
"Team:Operations",
1949+
"backport:all-open",
1950+
"release_note:skip"
1951+
],
1952+
"minimumReleaseAge": "7 days",
1953+
"enabled": true
1954+
},
19351955
{
19361956
"groupName": "mocha",
19371957
"matchDepNames": [
@@ -4357,6 +4377,7 @@
43574377
"@opentelemetry/exporter-trace-otlp-http",
43584378
"@opentelemetry/exporter-trace-otlp-proto",
43594379
"@opentelemetry/instrumentation",
4380+
"@opentelemetry/instrumentation-http",
43604381
"@opentelemetry/instrumentation-undici",
43614382
"@opentelemetry/otlp-exporter-base",
43624383
"@opentelemetry/semantic-conventions"

src/platform/packages/shared/kbn-scout/src/playwright/global_hooks/index.ts renamed to scripts/playwright.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
export { ingestTestDataHook } from './data_ingestion';
11-
export { ingestSynthtraceDataHook } from './synthtrace_ingestion';
10+
require('./setup_playwright');
11+
require('@playwright/test/cli');

scripts/setup_playwright.js

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
/**
11+
* This file is intended to be required before Playwright runs. It is responsible
12+
* for:
13+
* - Calling setup_node_env which registers babel transforms, source map support,
14+
* hardening etc
15+
* - Initializing APM/OpenTelemetry
16+
* - Transforming logging-related command-line flags into an environment variable
17+
* so it doesn't interfere with Playwright's own command-line flags
18+
*/
19+
20+
/**
21+
* We disable node's version validation here as some IDEs will bundle their own
22+
* Node.js version for extensions, or there's no way to get it to use Kibana's
23+
* version, and it would cause the version validation check in setup_node_env to
24+
* fail if the versions are out of sync.
25+
*/
26+
process.env.UNSAFE_DISABLE_NODE_VERSION_VALIDATION = 'true';
27+
require('../src/setup_node_env');

src/cli/apm.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const { initTelemetry } = require('@kbn/telemetry');
1515
const rootDir = join(__dirname, '../..');
1616
const isKibanaDistributable = Boolean(build && build.distributable === true);
1717

18-
module.exports = function (serviceName = name) {
19-
initApm(process.argv, rootDir, isKibanaDistributable, serviceName);
20-
initTelemetry(process.argv, rootDir, isKibanaDistributable, serviceName);
18+
module.exports = function (serviceName = name, argv = process.argv) {
19+
initApm(argv, rootDir, isKibanaDistributable, serviceName);
20+
initTelemetry(argv, rootDir, isKibanaDistributable, serviceName);
2121
};

src/core/packages/usage-data/server-internal/src/core_usage_data_service.test.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import type { ConfigPath } from '@kbn/config';
1111
import { BehaviorSubject, Observable } from 'rxjs';
1212
import { TestScheduler } from 'rxjs/testing';
1313

14-
// eslint-disable-next-line @kbn/imports/no_unresolvable_imports
1514
import { HotObservable } from 'rxjs/internal/testing/HotObservable';
1615

1716
import { configServiceMock } from '@kbn/config-mocks';

src/dev/license_checker/config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,5 @@ export const LICENSE_OVERRIDES = {
9696
'language-subtag-registry@0.3.21': ['CC-BY-4.0'], // retired ODC‑By license https://github.com/mattcg/language-subtag-registry
9797
'buffers@0.1.1': ['MIT'], // license in importing module https://www.npmjs.com/package/binary
9898
'@bufbuild/protobuf@2.5.2': ['Apache-2.0'], // license (Apache-2.0 AND BSD-3-Clause)
99+
'@arizeai/phoenix-client@2.3.4': ['Elastic License 2.0'], // see https://github.com/Arize-ai/phoenix/blob/main/LICENSE
99100
};

0 commit comments

Comments
 (0)