From 3da04db54e8f3af8d63758de247c2de363251e27 Mon Sep 17 00:00:00 2001 From: Shahzad Date: Fri, 29 May 2026 18:56:26 +0200 Subject: [PATCH] [CI] Opt-in FTR solution-selective testing via PR labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce flaky-FTR noise on PRs whose changes are confined to a single solution. Two opt-in PR labels gate the behaviour: - ci:skip-unaffected-ftr-configs: drop FTR configs that belong to solutions the PR does not touch. - ci:soft-fail-unaffected-ftr-configs: keep those configs running but mark their failures non-blocking (warning annotations) so unrelated flakiness can't block the PR. Detection is deliberately conservative. The diff is only narrowed when every changed file (and every downstream dependent) is confined to one or more solutions' private code. Anything touching platform/shared modules, CI/test-infra, or FTR manifests — or a downstream platform consumer — runs the full, blocking suite exactly as today. On-merge builds always run the full suite. Implemented on top of the affected-packages module graph: kibana.jsonc `group` is now captured per module (getModuleGroup) and the downstream dependency graph is used to expand the affected set before deciding whether the change is solution-confined. Co-authored-by: Cursor --- .../affected-packages/README.md | 14 ++ .../pipeline-utils/affected-packages/index.ts | 1 + .../affected-packages/module_lookup.test.ts | 32 +++ .../affected-packages/module_lookup.ts | 21 +- .../pick_test_group_run_order/const.ts | 63 ++++++ .../env_config.test.ts | 31 ++- .../pick_test_group_run_order/env_config.ts | 20 +- .../ftr_manifests.test.ts | 65 ++++++ .../ftr_manifests.ts | 3 +- .../pick_test_group_run_order.ts | 89 +++++++- .../selective_ftr.test.ts | 191 ++++++++++++++++++ .../selective_ftr.ts | 173 ++++++++++++++++ .buildkite/scripts/steps/test/ftr_configs.sh | 40 +++- 13 files changed, 737 insertions(+), 6 deletions(-) create mode 100644 .buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/ftr_manifests.test.ts create mode 100644 .buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/selective_ftr.test.ts create mode 100644 .buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/selective_ftr.ts diff --git a/.buildkite/pipeline-utils/affected-packages/README.md b/.buildkite/pipeline-utils/affected-packages/README.md index af12c794967e4..098933b557178 100644 --- a/.buildkite/pipeline-utils/affected-packages/README.md +++ b/.buildkite/pipeline-utils/affected-packages/README.md @@ -133,3 +133,17 @@ const filteredFiles = filterFilesByPackages( ## PR Jest selective testing On pull request builds, Jest unit and integration test groups are narrowed to configs under affected packages (see `pick_test_group_run_order` in CI stats). Add the GitHub label `ci:prevent-selective-testing` to run the full Jest suite instead. Touching files listed in `CRITICAL_FILES_JEST_*` in `const.ts` also skips filtering for the relevant test type. + +## PR FTR solution-selective testing (opt-in) + +FTR configs are **not** narrowed by affected packages by default (every PR runs the full enabled manifest set). Two opt-in PR labels change how FTR behaves when a PR's diff is confined to one or more **solutions** (`observability`, `security`, `search`, `workplaceai`, `vectordb`): + +- `ci:skip-unaffected-ftr-configs` — drop the FTR configs of solutions the PR does not touch (only the touched solutions + `platform`/`base` manifests run). +- `ci:soft-fail-unaffected-ftr-configs` — keep running the untouched solutions' configs, but make their failures **non-blocking** (reported and annotated, but they don't fail the PR). + +If both labels are present, skip wins. The implementation lives in `pick_test_group_run_order/selective_ftr.ts` and uses the affected-packages utilities here: + +- A change is attributed to a solution by the module `group` from `kibana.jsonc` (via `getModuleGroup`) or, for files outside any module, by the `x-pack/solutions//` path. +- The diff must be **fully** confined to solutions. The full FTR suite still runs (blocking) when any changed file maps to `platform`/shared or to no solution, when a downstream dependent lives in `platform`/an unknown group, or when a file in `CRITICAL_FILES_FTR` (`const.ts`) changes. This is safe because solutions are `visibility: private` and cannot depend on one another. + +The non-blocking behaviour is enforced inside `.buildkite/scripts/steps/test/ftr_configs.sh`, which reads the `ftr_soft_fail_configs.json` artifact produced by `pick_test_group_run_order` and swallows (only) those configs' failures. diff --git a/.buildkite/pipeline-utils/affected-packages/index.ts b/.buildkite/pipeline-utils/affected-packages/index.ts index d9b32da5b3211..22afa901a6b56 100644 --- a/.buildkite/pipeline-utils/affected-packages/index.ts +++ b/.buildkite/pipeline-utils/affected-packages/index.ts @@ -14,6 +14,7 @@ import { getAffectedProjectsMoon } from './strategy_moon'; export * from './const'; export * from './utils'; export { listChangedFiles } from './strategy_git'; +export { findModuleForPath, getModuleGroup } from './module_lookup'; export interface AffectedPackagesConfig { strategy?: 'git' | 'moon'; diff --git a/.buildkite/pipeline-utils/affected-packages/module_lookup.test.ts b/.buildkite/pipeline-utils/affected-packages/module_lookup.test.ts index b3234f68106ae..b553da2d2b366 100644 --- a/.buildkite/pipeline-utils/affected-packages/module_lookup.test.ts +++ b/.buildkite/pipeline-utils/affected-packages/module_lookup.test.ts @@ -21,6 +21,7 @@ jest.mock('../utils', () => ({ import { getModuleLookup, findModuleForPath, + getModuleGroup, getModuleDependencies, buildModuleDownstreamGraph, resetModuleLookupCache, @@ -261,6 +262,37 @@ describe('module_lookup', () => { }); }); + describe('getModuleGroup', () => { + it('captures the `group` field from kibana.jsonc for every module', () => { + const { groupById } = getModuleLookup(); + for (const spec of MODULES) { + // createModule() writes `group: 'platform'` for all test modules + expect(groupById.get(spec.id)).toBe('platform'); + } + }); + + it('returns the group for a known module via getModuleGroup', () => { + expect(getModuleGroup('@kbn/core')).toBe('platform'); + }); + + it('returns undefined for an unknown module', () => { + expect(getModuleGroup('@kbn/does-not-exist')).toBeUndefined(); + }); + + it('omits modules whose kibana.jsonc has no group', () => { + const dir = path.join(tmpDir, 'packages', 'no-group'); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync( + path.join(dir, 'kibana.jsonc'), + JSON.stringify({ type: 'shared-common', id: '@kbn/no-group' }) + ); + commitAll(tmpDir, 'add module without group'); + + resetModuleLookupCache(); + expect(getModuleGroup('@kbn/no-group')).toBeUndefined(); + }); + }); + describe('findModuleForPath', () => { it('maps a deep file path to its containing module', () => { expect(findModuleForPath('packages/core/src/index.ts')).toBe('@kbn/core'); diff --git a/.buildkite/pipeline-utils/affected-packages/module_lookup.ts b/.buildkite/pipeline-utils/affected-packages/module_lookup.ts index 5225e0c3a5ed9..7cdad8d5501d7 100644 --- a/.buildkite/pipeline-utils/affected-packages/module_lookup.ts +++ b/.buildkite/pipeline-utils/affected-packages/module_lookup.ts @@ -23,6 +23,12 @@ export interface ModuleLookup { * `"@kbn/core-http-server-internal"` → `"src/core/packages/http/server-internal"` */ byId: Map; + /** + * `"@kbn/core-http-server-internal"` → `"platform"` (the `group` field from + * `kibana.jsonc`: `platform` or a solution name such as `observability`). + * Modules whose manifest omits `group` are absent from this map. + */ + groupById: Map; } let cachedModuleLookup: ModuleLookup | null = null; @@ -41,6 +47,7 @@ export function getModuleLookup(): ModuleLookup { const byDir = new Map(); const byId = new Map(); + const groupById = new Map(); for (const file of files) { if (file.includes('__fixtures__')) { @@ -52,13 +59,25 @@ export function getModuleLookup(): ModuleLookup { if (config.id && typeof config.id === 'string') { byDir.set(dir, config.id); byId.set(config.id, dir); + if (config.group && typeof config.group === 'string') { + groupById.set(config.id, config.group); + } } } - cachedModuleLookup = { byDir, byId }; + cachedModuleLookup = { byDir, byId, groupById }; return cachedModuleLookup; } +/** + * Returns the `group` declared in a module's `kibana.jsonc` (e.g. `platform` or + * a solution name like `observability`), or `undefined` when the module is + * unknown or declares no group. + */ +export function getModuleGroup(moduleId: string): string | undefined { + return getModuleLookup().groupById.get(moduleId); +} + export function findModuleForPath(filePath: string): string | undefined { const lookup = getModuleLookup(); const normalizedFilePath = filePath.replace(/\\/g, '/'); diff --git a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/const.ts b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/const.ts index db417d825279d..5fda3f3a4ebe6 100644 --- a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/const.ts +++ b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/const.ts @@ -116,3 +116,66 @@ export const STEP_KEYS = { /** PR label that prevents selective testing. */ export const PREVENT_SELECTIVE_TESTS_LABEL = 'ci:prevent-selective-testing'; + +/** + * PR label that drops FTR configs belonging to solutions the PR does not touch. + * Only takes effect when the diff is confined to one or more solutions (no + * platform/shared/CI/test-infra changes); otherwise the full suite still runs. + */ +export const FTR_SKIP_UNAFFECTED_LABEL = 'ci:skip-unaffected-ftr-configs'; + +/** + * PR label that keeps running FTR configs of untouched solutions but makes their + * failures non-blocking (they no longer fail the PR). Same confinement gate as + * the skip label. If both labels are present, skip wins. + */ +export const FTR_SOFT_FAIL_UNAFFECTED_LABEL = 'ci:soft-fail-unaffected-ftr-configs'; + +/** The base `group` shared across solutions (from `kibana.jsonc`). */ +export const PLATFORM_GROUP = 'platform'; + +/** Solution `group` values (from `kibana.jsonc`), matching `VALID_SOLUTIONS`. */ +export const SOLUTION_GROUPS = [ + 'observability', + 'security', + 'search', + 'workplaceai', + 'vectordb', +] as const; + +/** + * Maps a solution `group` to the infix used in its FTR manifest filenames + * (`.buildkite/ftr-manifests/ftr__*.yml`). Most match 1:1, but a few + * historical names differ (`observability`→`oblt`, `workplaceai`→`workplace_ai`). + */ +export const SOLUTION_MANIFEST_INFIX: Record = { + observability: 'oblt', + security: 'security', + search: 'search', + workplaceai: 'workplace_ai', + vectordb: 'vectordb', +}; + +/** + * Touching any of these forces the full FTR suite to run (blocking), even when + * the rest of the diff looks solution-scoped. Kept narrow: shared test harness, + * FTR base services, CI selection logic, and root toolchain files. Most of these + * already resolve to the `platform` group or `[uncategorized]` and would bail + * anyway — listing them makes the intent explicit and guards path edge-cases. + */ +export const CRITICAL_FILES_FTR = [ + 'package.json', + 'yarn.lock', + 'tsconfig.base.json', + 'tsconfig.json', + '.node-version', + '.nvmrc', + 'src/setup_node_env/**/*', + 'src/platform/packages/shared/kbn-test/**/*', + 'src/platform/packages/shared/kbn-ftr-common-functional-services/**/*', + 'src/platform/packages/shared/kbn-ftr-common-functional-ui-services/**/*', + '.buildkite/ftr-manifests/**/*', + '.buildkite/pipeline-utils/affected-packages/**/*.{ts,js,sh}', + '.buildkite/pipeline-utils/ci-stats/**/*.{ts,js}', + '.buildkite/scripts/steps/test/**/*', +]; diff --git a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/env_config.test.ts b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/env_config.test.ts index 1700f54d2612e..ba283fdc21974 100644 --- a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/env_config.test.ts +++ b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/env_config.test.ts @@ -12,7 +12,13 @@ jest.mock('#pipeline-utils', () => ({ collectEnvFromLabels: () => ({}), })); -import { MAX_MINUTES, PREVENT_SELECTIVE_TESTS_LABEL, RETRIES } from './const'; +import { + FTR_SKIP_UNAFFECTED_LABEL, + FTR_SOFT_FAIL_UNAFFECTED_LABEL, + MAX_MINUTES, + PREVENT_SELECTIVE_TESTS_LABEL, + RETRIES, +} from './const'; import { loadRunOrderConfig } from './env_config'; const TYPE_ENV = { @@ -55,6 +61,8 @@ describe('loadRunOrderConfig', () => { expect(cfg.ftrConfigsDeps).toEqual(['build']); expect(cfg.jestConfigsDeps).toEqual([]); expect(cfg.useSelectiveTesting).toBe(false); + expect(cfg.ftrSkipUnaffectedSolutions).toBe(false); + expect(cfg.ftrSoftFailUnaffectedSolutions).toBe(false); }); it('parses CSV envs and trims whitespace', () => { @@ -143,6 +151,27 @@ describe('loadRunOrderConfig', () => { expect(cfg.useSelectiveTesting).toBe(false); }); + it('enables FTR skip-unaffected when its label is present', () => { + process.env.GITHUB_PR_LABELS = `foo,${FTR_SKIP_UNAFFECTED_LABEL},bar`; + const cfg = loadRunOrderConfig(); + expect(cfg.ftrSkipUnaffectedSolutions).toBe(true); + expect(cfg.ftrSoftFailUnaffectedSolutions).toBe(false); + }); + + it('enables FTR soft-fail-unaffected when its label is present', () => { + process.env.GITHUB_PR_LABELS = `${FTR_SOFT_FAIL_UNAFFECTED_LABEL}`; + const cfg = loadRunOrderConfig(); + expect(cfg.ftrSoftFailUnaffectedSolutions).toBe(true); + expect(cfg.ftrSkipUnaffectedSolutions).toBe(false); + }); + + it('leaves both FTR selective flags off when no related label is present', () => { + process.env.GITHUB_PR_LABELS = 'some-other-label'; + const cfg = loadRunOrderConfig(); + expect(cfg.ftrSkipUnaffectedSolutions).toBe(false); + expect(cfg.ftrSoftFailUnaffectedSolutions).toBe(false); + }); + it('uses TEST_GROUP_TYPE_* overrides when provided', () => { process.env.TEST_GROUP_TYPE_UNIT = 'unit-type'; process.env.TEST_GROUP_TYPE_INTEGRATION = 'integration-type'; diff --git a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/env_config.ts b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/env_config.ts index 816260a003595..e17256fc8e61b 100644 --- a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/env_config.ts +++ b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/env_config.ts @@ -7,7 +7,13 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -import { MAX_MINUTES, RETRIES, PREVENT_SELECTIVE_TESTS_LABEL } from './const'; +import { + MAX_MINUTES, + RETRIES, + PREVENT_SELECTIVE_TESTS_LABEL, + FTR_SKIP_UNAFFECTED_LABEL, + FTR_SOFT_FAIL_UNAFFECTED_LABEL, +} from './const'; import { collectEnvFromLabels, getRequiredEnv } from '#pipeline-utils'; const VALID_SOLUTIONS = ['observability', 'search', 'security', 'workplaceai', 'vectordb']; @@ -78,11 +84,23 @@ export function loadRunOrderConfig() { useSelectiveTesting: Boolean(process.env.GITHUB_PR_NUMBER) && !(parseCsvEnv('GITHUB_PR_LABELS') ?? []).includes(PREVENT_SELECTIVE_TESTS_LABEL), + + // Opt-in FTR solution-selective behaviour, driven by PR labels. These are + // independent of `useSelectiveTesting` (which only governs Jest) but still + // require a merge base to diff against. + ftrSkipUnaffectedSolutions: hasLabel(FTR_SKIP_UNAFFECTED_LABEL), + ftrSoftFailUnaffectedSolutions: hasLabel(FTR_SOFT_FAIL_UNAFFECTED_LABEL), + prMergeBase: process.env.GITHUB_PR_MERGE_BASE || undefined, prNumber: process.env.GITHUB_PR_NUMBER || undefined, } as const; } +/** True when the given label is present in `GITHUB_PR_LABELS`. */ +function hasLabel(label: string): boolean { + return (parseCsvEnv('GITHUB_PR_LABELS') ?? []).includes(label); +} + export type RunOrderConfig = ReturnType; function parseFloatEnv(name: string, defaultValue: number): number { diff --git a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/ftr_manifests.test.ts b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/ftr_manifests.test.ts new file mode 100644 index 0000000000000..77a59f71b8e43 --- /dev/null +++ b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/ftr_manifests.test.ts @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import * as path from 'path'; + +import { SOLUTION_MANIFEST_INFIX } from './const'; +import { getEnabledFtrConfigs } from './ftr_manifests'; + +// repo root, relative to .buildkite/pipeline-utils/ci-stats/pick_test_group_run_order +const REPO_ROOT = path.resolve(__dirname, '../../../..'); + +describe('SOLUTION_MANIFEST_INFIX', () => { + it('maps each solution group to its manifest filename infix', () => { + expect(SOLUTION_MANIFEST_INFIX).toEqual({ + observability: 'oblt', + security: 'security', + search: 'search', + workplaceai: 'workplace_ai', + vectordb: 'vectordb', + }); + }); +}); + +describe('getEnabledFtrConfigs – solution filtering', () => { + const originalCwd = process.cwd(); + + beforeAll(() => { + // manifest paths in the JSON index are relative to the repo root + process.chdir(REPO_ROOT); + }); + + afterAll(() => { + process.chdir(originalCwd); + }); + + const flatten = (byQueue: Map) => Array.from(byQueue.values()).flat(); + + it('picks up workplace_ai manifest configs when filtering by "workplaceai"', () => { + // Regression guard: the old mapping only remapped observability→oblt, so + // `workplaceai` silently matched no manifest (ftr_workplaceai_ vs ftr_workplace_ai_). + const { ftrConfigsByQueue } = getEnabledFtrConfigs(undefined, ['workplaceai']); + const configs = flatten(ftrConfigsByQueue); + expect(configs).toContain( + 'x-pack/solutions/workplaceai/test/serverless/functional/configs/config.ts' + ); + }); + + it('returns a strict subset of the full enabled set (other solution manifests dropped)', () => { + const all = flatten(getEnabledFtrConfigs(undefined, undefined).ftrConfigsByQueue); + const filtered = flatten(getEnabledFtrConfigs(undefined, ['workplaceai']).ftrConfigsByQueue); + + expect(filtered.length).toBeGreaterThan(0); + // filtering actually drops the other solutions' manifests… + expect(filtered.length).toBeLessThan(all.length); + // …and every retained config is part of the full enabled set + const allSet = new Set(all); + expect(filtered.every((c) => allSet.has(c))).toBe(true); + }); +}); diff --git a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/ftr_manifests.ts b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/ftr_manifests.ts index fa584f30f7456..85324589734f4 100644 --- a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/ftr_manifests.ts +++ b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/ftr_manifests.ts @@ -13,6 +13,7 @@ import minimatch from 'minimatch'; import { parse as loadYaml } from 'yaml'; import { serverless, stateful } from '../../../ftr-manifests/ftr_configs_manifests.json'; +import { SOLUTION_MANIFEST_INFIX } from './const'; import type { FtrConfigsManifest } from './types'; const ALL_FTR_MANIFEST_REL_PATHS = serverless.concat(stateful); @@ -32,7 +33,7 @@ export function getEnabledFtrConfigs( } = { enabled: [], defaultQueue: undefined }; const uniqueQueues = new Set(); - const mappedSolutions = solutions?.map((s) => (s === 'observability' ? 'oblt' : s)); + const mappedSolutions = solutions?.map((s) => SOLUTION_MANIFEST_INFIX[s] ?? s); for (const manifestRelPath of ALL_FTR_MANIFEST_REL_PATHS) { if ( mappedSolutions && diff --git a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/pick_test_group_run_order.ts b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/pick_test_group_run_order.ts index bbd11775c90d5..7892153a34446 100644 --- a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/pick_test_group_run_order.ts +++ b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/pick_test_group_run_order.ts @@ -17,10 +17,11 @@ import { CiStatsClient } from '../client'; import { buildCiStatsGroups, buildCiStatsSources } from './ci_stats_sources'; import { AGENT_DISK_GIB, DURATION_PERCENTILE, STEP_KEYS } from './const'; -import { loadRunOrderConfig } from './env_config'; +import { loadRunOrderConfig, type RunOrderConfig } from './env_config'; import { getEnabledFtrConfigs } from './ftr_manifests'; import { discoverJestIntegrationConfigs, discoverJestUnitConfigs } from './jest_configs'; import { getRunGroup, getRunGroups, labelJestSubgroups } from './run_groups'; +import { diffSoftFailConfigs, resolveAffectedFtrSolutions } from './selective_ftr'; import { isScoutTestsOnlyDiff } from './selective_scout'; import { filterJestIntegrationConfigsByAffected, @@ -77,6 +78,14 @@ export async function pickTestGroupRunOrder() { ); if (!ftrConfigsIncluded) ftrConfigsByQueue.clear(); + // Opt-in FTR solution-selective behaviour (PR labels). When the diff is + // confined to one or more solutions we either drop the untouched solutions' + // configs (skip) or keep running them as non-blocking (soft-fail). Otherwise + // the full enabled suite runs, blocking, exactly as before. + const ftrSoftFailConfigs = ftrConfigsIncluded + ? await applyFtrSolutionSelection(bk, config, ftrConfigsByQueue) + : []; + if (selectiveTestingMergeBase) { const selectiveCtx = await resolveSelectiveTestingContext(selectiveTestingMergeBase); if (selectiveCtx !== null) { @@ -128,6 +137,11 @@ export async function pickTestGroupRunOrder() { if (ftrConfigsIncluded) { Fs.writeFileSync('ftr_run_order.json', JSON.stringify(ftrRunOrder, null, 2)); bk.uploadArtifacts('ftr_run_order.json'); + + // Always emitted (possibly empty) so the FTR job can unconditionally decide + // which configs are non-blocking without guessing whether the file exists. + Fs.writeFileSync('ftr_soft_fail_configs.json', JSON.stringify(ftrSoftFailConfigs, null, 2)); + bk.uploadArtifacts('ftr_soft_fail_configs.json'); } const steps: BuildkiteStep[] = [ @@ -174,6 +188,79 @@ export async function pickTestGroupRunOrder() { bk.uploadSteps(steps); } +/** + * Apply the opt-in FTR solution-selective labels to the enabled config set. + * + * - `ci:skip-unaffected-ftr-configs`: mutate `ftrConfigsByQueue` in place to + * keep only the touched solutions (+ platform/base); returns `[]`. + * - `ci:soft-fail-unaffected-ftr-configs`: leave `ftrConfigsByQueue` untouched + * (everything still runs) and return the untouched-solution config paths that + * the FTR job should treat as non-blocking. + * + * When neither label is set, no merge base is available, or the diff cannot be + * narrowed to solutions, nothing changes and `[]` is returned (full blocking + * suite). If both labels are set, skip wins. + */ +async function applyFtrSolutionSelection( + bk: BuildkiteClient, + config: RunOrderConfig, + ftrConfigsByQueue: Map +): Promise { + const skip = config.ftrSkipUnaffectedSolutions; + const softFail = config.ftrSoftFailUnaffectedSolutions; + if (!skip && !softFail) return []; + + if (!config.prMergeBase) { + bk.setAnnotation( + 'ftr-selective-testing', + 'warning', + 'FTR selective testing label set but no merge base is available — running the full FTR suite (blocking).' + ); + return []; + } + + const { solutions, reason } = await resolveAffectedFtrSolutions(config.prMergeBase); + if (!solutions) { + bk.setAnnotation( + 'ftr-selective-testing', + 'info', + `FTR selective testing not applied — running the full FTR suite (blocking). Reason: ${reason}.` + ); + return []; + } + + const solutionList = [...solutions].sort(); + const { ftrConfigsByQueue: blockingByQueue } = getEnabledFtrConfigs( + config.ftrConfigPatterns, + solutionList + ); + + // `skip` wins when both labels are present. + if (skip) { + ftrConfigsByQueue.clear(); + for (const [queue, names] of blockingByQueue) { + ftrConfigsByQueue.set(queue, names); + } + bk.setAnnotation( + 'ftr-selective-testing', + 'info', + `FTR selective testing: skipping configs of untouched solutions. ${reason}. ` + + `Running solution(s) [${solutionList.join(', ')}] + platform/base only.` + ); + return []; + } + + const softFailConfigs = diffSoftFailConfigs(ftrConfigsByQueue, blockingByQueue); + bk.setAnnotation( + 'ftr-selective-testing', + 'info', + `FTR selective testing: ${softFailConfigs.length} config(s) of untouched solutions are non-blocking ` + + `(they still run, but their failures won't fail this PR). ${reason}. ` + + `Blocking solution(s) [${solutionList.join(', ')}] + platform/base.` + ); + return softFailConfigs; +} + /** * Throws an error if the variable's value is missing at runtime. */ diff --git a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/selective_ftr.test.ts b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/selective_ftr.test.ts new file mode 100644 index 0000000000000..cb0e5c4e0d53d --- /dev/null +++ b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/selective_ftr.test.ts @@ -0,0 +1,191 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +jest.mock('../../affected-packages', () => { + const actual = jest.requireActual('../../affected-packages'); + return { + ...actual, + listChangedFiles: jest.fn(), + getAffectedPackages: jest.fn(), + getModuleGroup: jest.fn(), + findModuleForPath: jest.fn(), + }; +}); + +import { + findModuleForPath, + getAffectedPackages, + getModuleGroup, + listChangedFiles, + UNCATEGORIZED_MODULE_ID, +} from '../../affected-packages'; +import { + diffSoftFailConfigs, + flattenConfigPaths, + resolveAffectedFtrSolutions, +} from './selective_ftr'; + +const mockListChangedFiles = listChangedFiles as jest.MockedFunction; +const mockGetAffectedPackages = getAffectedPackages as jest.MockedFunction< + typeof getAffectedPackages +>; +const mockGetModuleGroup = getModuleGroup as jest.MockedFunction; +const mockFindModuleForPath = findModuleForPath as jest.MockedFunction; + +const MERGE_BASE = 'abc123'; + +describe('resolveAffectedFtrSolutions', () => { + beforeEach(() => { + jest.resetAllMocks(); + // Default: changed files map to no module (uncategorized) unless overridden. + mockFindModuleForPath.mockReturnValue(UNCATEGORIZED_MODULE_ID); + mockGetAffectedPackages.mockResolvedValue(new Set()); + mockGetModuleGroup.mockReturnValue(undefined); + }); + + it('bails (null) when there are no changed files', async () => { + mockListChangedFiles.mockReturnValue([]); + const { solutions, reason } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions).toBeNull(); + expect(reason).toMatch(/no changed files/); + }); + + it('bails when a critical shared/CI file changed', async () => { + mockListChangedFiles.mockReturnValue([ + 'package.json', + 'x-pack/solutions/observability/plugins/slo/server/x.ts', + ]); + const { solutions, reason } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions).toBeNull(); + expect(reason).toMatch(/critical/); + }); + + it('narrows to a solution attributed purely by path (no owning module)', async () => { + mockListChangedFiles.mockReturnValue([ + 'x-pack/solutions/observability/test/functional/apps/foo/config.ts', + ]); + const { solutions } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions && [...solutions]).toEqual(['observability']); + }); + + it('narrows to a solution attributed by module group', async () => { + mockListChangedFiles.mockReturnValue([ + 'x-pack/solutions/security/plugins/security_solution/server/x.ts', + ]); + mockGetAffectedPackages.mockResolvedValue(new Set(['@kbn/security-solution-plugin'])); + mockGetModuleGroup.mockImplementation((id) => + id === '@kbn/security-solution-plugin' ? 'security' : undefined + ); + const { solutions } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions && [...solutions]).toEqual(['security']); + }); + + it('collects multiple solutions when the diff spans them', async () => { + mockListChangedFiles.mockReturnValue([ + 'x-pack/solutions/observability/plugins/slo/server/a.ts', + 'x-pack/solutions/search/plugins/enterprise_search/server/b.ts', + ]); + const { solutions } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions && [...solutions].sort()).toEqual(['observability', 'search']); + }); + + it('bails when a changed file belongs to the platform group', async () => { + mockListChangedFiles.mockReturnValue(['src/core/server/http/router.ts']); + mockFindModuleForPath.mockReturnValue('@kbn/core-http-server-internal'); + mockGetModuleGroup.mockReturnValue('platform'); + const { solutions, reason } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions).toBeNull(); + expect(reason).toMatch(/outside any solution/); + }); + + it('bails when a downstream dependent lives in the platform group', async () => { + mockListChangedFiles.mockReturnValue([ + 'x-pack/solutions/observability/packages/shared-utils/index.ts', + ]); + // file attributes to observability by path; downstream pulls in a platform consumer + mockGetAffectedPackages.mockResolvedValue( + new Set(['@kbn/obs-shared-utils', '@kbn/some-platform-consumer']) + ); + mockGetModuleGroup.mockImplementation((id) => + id === '@kbn/obs-shared-utils' ? 'observability' : 'platform' + ); + const { solutions, reason } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions).toBeNull(); + expect(reason).toMatch(/platform\/shared module affected/); + }); + + it('bails when a downstream module has an unrecognized group', async () => { + mockListChangedFiles.mockReturnValue([ + 'x-pack/solutions/search/plugins/enterprise_search/server/a.ts', + ]); + mockGetAffectedPackages.mockResolvedValue(new Set(['@kbn/mystery'])); + mockGetModuleGroup.mockReturnValue('galaxy'); + const { solutions, reason } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions).toBeNull(); + expect(reason).toMatch(/unrecognized group/); + }); + + it('expands solutions via the downstream graph (cross-solution dependent)', async () => { + mockListChangedFiles.mockReturnValue([ + 'x-pack/solutions/observability/packages/shared/index.ts', + ]); + mockGetAffectedPackages.mockResolvedValue( + new Set(['@kbn/obs-shared', '@kbn/some-security-consumer']) + ); + mockGetModuleGroup.mockImplementation((id) => + id === '@kbn/obs-shared' ? 'observability' : 'security' + ); + const { solutions } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions && [...solutions].sort()).toEqual(['observability', 'security']); + }); + + it('bails when affected-package detection throws', async () => { + mockListChangedFiles.mockReturnValue([ + 'x-pack/solutions/observability/plugins/slo/server/a.ts', + ]); + mockGetAffectedPackages.mockRejectedValue(new Error('boom')); + const { solutions, reason } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions).toBeNull(); + expect(reason).toMatch(/affected-package detection failed/); + }); + + it('bails when a file lives under x-pack/solutions but in an unknown solution dir', async () => { + mockListChangedFiles.mockReturnValue(['x-pack/solutions/made_up/plugins/foo/a.ts']); + const { solutions, reason } = await resolveAffectedFtrSolutions(MERGE_BASE); + expect(solutions).toBeNull(); + expect(reason).toMatch(/outside any solution/); + }); +}); + +describe('flattenConfigPaths', () => { + it('flattens a queue→configs map', () => { + const map = new Map([ + ['q1', ['a', 'b']], + ['q2', ['c']], + ]); + expect(flattenConfigPaths(map).sort()).toEqual(['a', 'b', 'c']); + }); +}); + +describe('diffSoftFailConfigs', () => { + it('returns configs present in full but not in blocking', () => { + const full = new Map([ + ['q1', ['platform/a', 'oblt/b', 'security/c']], + ['q2', ['search/d']], + ]); + const blocking = new Map([['q1', ['platform/a', 'oblt/b']]]); + expect(diffSoftFailConfigs(full, blocking).sort()).toEqual(['search/d', 'security/c']); + }); + + it('returns empty when blocking already covers everything', () => { + const full = new Map([['q1', ['a', 'b']]]); + const blocking = new Map([['q1', ['a', 'b']]]); + expect(diffSoftFailConfigs(full, blocking)).toEqual([]); + }); +}); diff --git a/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/selective_ftr.ts b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/selective_ftr.ts new file mode 100644 index 0000000000000..c89f6ca229a6d --- /dev/null +++ b/.buildkite/pipeline-utils/ci-stats/pick_test_group_run_order/selective_ftr.ts @@ -0,0 +1,173 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import { + findModuleForPath, + getAffectedPackages, + getModuleGroup, + listChangedFiles, + touchedCriticalFiles, + UNCATEGORIZED_MODULE_ID, +} from '../../affected-packages'; +import { CRITICAL_FILES_FTR, PLATFORM_GROUP, SOLUTION_GROUPS } from './const'; + +const SOLUTION_GROUP_SET: ReadonlySet = new Set(SOLUTION_GROUPS); + +/** + * `x-pack/solutions//...` — used to attribute changes that fall + * outside any registered module (e.g. functional test dirs that have no + * `kibana.jsonc`) to the owning solution by directory. + */ +const SOLUTION_PATH_RE = /^x-pack\/solutions\/([^/]+)\//; + +export interface FtrSolutionSelection { + /** + * The solution `group`s the PR is confined to, or `null` when the diff cannot + * be safely narrowed to solutions — in which case callers must run the full + * FTR suite (blocking), exactly as today. + */ + solutions: Set | null; + /** Human-readable explanation, surfaced as a Buildkite annotation. */ + reason: string; +} + +/** + * Decide which solution(s) a PR's diff is confined to, for FTR selection. + * + * Returns `null` (run everything, blocking) whenever the change could plausibly + * affect more than the touched solutions: + * - a critical shared/CI/test-infra file changed, + * - any changed file maps to the `platform` group or to no solution at all, + * - a downstream dependent of a changed module lives in `platform` or an + * unrecognized group. + * + * This is intentionally conservative: solutions are `visibility: private` and + * cannot depend on one another, so a diff that stays inside a single solution's + * code cannot break another solution — but anything touching shared/platform + * code can, so we fall back to the full suite there. + */ +export async function resolveAffectedFtrSolutions( + mergeBase: string +): Promise { + let changedFiles: string[]; + try { + changedFiles = listChangedFiles({ mergeBase, commit: 'HEAD' }); + } catch (error) { + return bail(`unable to list changed files (${errorMessage(error)})`); + } + + if (changedFiles.length === 0) { + return bail('no changed files detected'); + } + + if (touchedCriticalFiles(changedFiles, CRITICAL_FILES_FTR)) { + return bail('critical shared/CI/test-infra files changed'); + } + + const solutions = new Set(); + + // 1) Attribute every changed file to a solution. Any file that isn't owned by + // a solution (platform, shared, root tooling, …) disqualifies narrowing. + for (const file of changedFiles) { + const group = solutionForFile(file); + if (group === null) { + return bail(`change outside any solution: ${file}`); + } + solutions.add(group); + } + + // 2) Expand through the downstream dependency graph. If a changed module is + // consumed by a module in another group, that group must be covered too; + // a platform/shared consumer means the change can reach everything. + let affected: Set; + try { + affected = await getAffectedPackages(mergeBase, { + strategy: 'git', + includeDownstream: true, + ignorePatterns: [], + // Files outside modules are handled by the path-based loop above. + ignoreUncategorizedChanges: true, + }); + } catch (error) { + return bail(`affected-package detection failed (${errorMessage(error)})`); + } + + for (const moduleId of affected) { + if (moduleId === UNCATEGORIZED_MODULE_ID) { + continue; + } + const group = getModuleGroup(moduleId); + if (!group || group === PLATFORM_GROUP) { + return bail(`platform/shared module affected: ${moduleId}`); + } + if (!SOLUTION_GROUP_SET.has(group)) { + return bail(`unrecognized group "${group}" for module ${moduleId}`); + } + solutions.add(group); + } + + if (solutions.size === 0) { + return bail('no affected solutions resolved'); + } + + return { + solutions, + reason: `diff confined to solution(s): ${[...solutions].sort().join(', ')}`, + }; +} + +/** + * Flatten the queue→configs map into a single list of config paths. + */ +export function flattenConfigPaths(byQueue: Map): string[] { + return Array.from(byQueue.values()).flat(); +} + +/** + * Config paths present in `full` but not in `blocking` — i.e. the configs that + * belong to untouched solutions and should be made non-blocking (soft-fail). + */ +export function diffSoftFailConfigs( + full: Map, + blocking: Map +): string[] { + const blockingSet = new Set(flattenConfigPaths(blocking)); + return flattenConfigPaths(full).filter((path) => !blockingSet.has(path)); +} + +/** + * Resolve a changed file to its owning solution `group`, or `null` when it is + * not owned by a solution (platform/shared/uncategorized). + */ +function solutionForFile(file: string): string | null { + const solutionMatch = SOLUTION_PATH_RE.exec(file); + if (solutionMatch) { + const dir = solutionMatch[1]; + return SOLUTION_GROUP_SET.has(dir) ? dir : null; + } + + const moduleId = findModuleForPath(file); + if (!moduleId || moduleId === UNCATEGORIZED_MODULE_ID) { + return null; + } + + const group = getModuleGroup(moduleId); + if (!group || group === PLATFORM_GROUP || !SOLUTION_GROUP_SET.has(group)) { + return null; + } + return group; +} + +function bail(reason: string): FtrSolutionSelection { + return { solutions: null, reason }; +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh index a14283ef5c7df..3fe7441fbbf90 100755 --- a/.buildkite/scripts/steps/test/ftr_configs.sh +++ b/.buildkite/scripts/steps/test/ftr_configs.sh @@ -44,6 +44,26 @@ if [ "$configs" == "" ]; then exit 1 fi +# Configs belonging to solutions the PR does not touch can be made non-blocking +# via the `ci:soft-fail-unaffected-ftr-configs` label: they still run, but their +# failures are only reported/annotated and do not fail this step. The list is +# produced by pick_test_group_run_order and uploaded as an artifact (it is +# always present, possibly empty, for builds that go through the orchestrator). +softFailConfigs="" +if [ "$FTR_CONFIG_GROUP_KEY" != "" ]; then + # Mirror the ftr_run_order.json download (GCS first, Buildkite-artifact + # fallback). `|| true` keeps a genuine miss (non-orchestrator pipelines) from + # tripping `set -e`; we then guard on the file actually existing. + download_tmp_artifact ftr_soft_fail_configs.json . "$BUILDKITE_BUILD_ID" || true + if [ -f ftr_soft_fail_configs.json ]; then + softFailConfigs=$(jq -r '.[]' ftr_soft_fail_configs.json 2>/dev/null || echo "") + if [[ "$softFailConfigs" ]]; then + echo "--- The following FTR configs are non-blocking (untouched solutions):" + echo "$softFailConfigs" + fi + fi +fi + failedConfigs="" results=() @@ -54,6 +74,12 @@ while read -r config; do FULL_COMMAND="node scripts/functional_tests --bail --config $config $EXTRA_ARGS" + # Is this config non-blocking (belongs to a solution the PR doesn't touch)? + isSoftFail="false" + if [[ "$softFailConfigs" ]] && grep -Fxq -- "$config" <<< "$softFailConfigs"; then + isSoftFail="true" + fi + # see if this config has already been executed successfully CONFIG_EXECUTION_KEY="${config}_executed" IS_CONFIG_EXECUTION=$(buildkite-agent meta-data get "$CONFIG_EXECUTION_KEY" --default "false" --log-level error) @@ -117,13 +143,25 @@ while read -r config; do duration="${timeSec}s" fi + resultLabel="${lastCode}" + if [[ "$isSoftFail" == "true" && $lastCode -ne 0 ]]; then + resultLabel="${lastCode} (non-blocking)" + fi + results+=("- $config duration: ${duration} - result: ${lastCode}") + result: ${resultLabel}") if [ $lastCode -eq 0 ]; then # Test was successful, so mark it as executed buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true" + elif [[ "$isSoftFail" == "true" ]]; then + # Non-blocking config: report the failure but don't fail the step or queue + # it for the failed-only retry. + echo "⚠️ Non-blocking FTR config failed with code $lastCode (untouched solution): $config" + echo "^^^ +++" + buildkite-agent annotate --style "warning" --context "ftr-non-blocking" --append "⚠️ Non-blocking FTR failure: \`${config}\` (exit ${lastCode}) +" || true else exitCode=10 echo "FTR exited with code $lastCode"