Skip to content

Commit dd70d94

Browse files
[9.4] [Entity Store] Cap window size (#268170) (#268446)
# Backport This will backport the following commits from `main` to `9.4`: - [[Entity Store] Cap window size (#268170)](#268170) <!--- Backport version: 11.0.2 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sorenlouv/backport) <!--BACKPORT [{"author":{"name":"Rômulo Farias","email":"romulo.farias@elastic.co"},"sourceCommit":{"committedDate":"2026-05-08T13:18:48Z","message":"[Entity Store] Cap window size (#268170)\n\nIn lagging environments the entity-store extraction window grows\nunboundedly. `getExtractionWindow` always sets `toDateISO = now - delay`\nwhile `fromDateISO` advances only via `lastExecutionTimestamp /\npaginationTimestamp`. If a run cannot keep up, each subsequent run sees\na wider window. The probe (`buildLogPaginationCursorProbeEsql`) sorts\nevery doc in that window in ES|QL, so probe cost grows with window size\n— feeding a death-spiral where slow runs widen the window which slows\nthe next run further.\n\nWe want a hard cap on the width of each probe's window so that probe\ncost stays bounded regardless of how far behind the engine is. The cap\nis purely a cost-bounding device for the probe — it does not\nartificially defer catch-up to a later run.\n\nWithin a single extractLogs execution, once a capped sub-window is\ndrained we immediately advance to the next sub-window and continue,\nuntil we reach the effective window end (now - delay). Only when a run\nis interrupted (crash, abort, hitting a slow probe) do we resume on the\nnext scheduled run from the last persisted lastExecutionTimestamp.\n\n### How it works\n\nWhen the gap between `fromDateISO` and the effective window end (`now -\ndelay`) exceeds `maxTimeWindowSize + GRACE_PERIOD` (default `15m +\n30s`), the run processes the time range as a sequence of capped\n`[fromSub, toSub]` sub-windows of width `maxTimeWindowSize`, advancing\nwithin a single execution until the effective end is reached.\nSub-windows are an in-memory iteration concept — the saved-object schema\nis unaware of them. Crash recovery uses the per-slice persistence\nemitted by the inner outer-loop (last `paginationTimestamp` /\n`checkpointTimestamp` written). Manual `specificWindow` /\n`windowOverride` runs bypass capping and run as a single pass.\n\n\n### Added\n\n- `maxTimeWindowSize` parameter to the global configuration, available\non install and update paths. Also exposed via status api\n\n\n### Why default 15m\n\n`15m` seems to be an ok cap based on the default `3h` look back period.\nA too short `1m` will cause 180 queries to elasticsearch. `15m` will\ncause only 12.\n\nThis will need to be configured on heavy environments where `15m` worth\nof data account for millions of logs.\n\n---------\n\nCo-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>","sha":"555237a0dba491c31901c1b3323f667ecb580a0d","branchLabelMapping":{"^v9.5.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:fix","backport:version","v9.4.0","reviewer:macroscope","v9.5.0","v9.4.1"],"title":"[Entity Store] Cap window size","number":268170,"url":"https://github.com/elastic/kibana/pull/268170","mergeCommit":{"message":"[Entity Store] Cap window size (#268170)\n\nIn lagging environments the entity-store extraction window grows\nunboundedly. `getExtractionWindow` always sets `toDateISO = now - delay`\nwhile `fromDateISO` advances only via `lastExecutionTimestamp /\npaginationTimestamp`. If a run cannot keep up, each subsequent run sees\na wider window. The probe (`buildLogPaginationCursorProbeEsql`) sorts\nevery doc in that window in ES|QL, so probe cost grows with window size\n— feeding a death-spiral where slow runs widen the window which slows\nthe next run further.\n\nWe want a hard cap on the width of each probe's window so that probe\ncost stays bounded regardless of how far behind the engine is. The cap\nis purely a cost-bounding device for the probe — it does not\nartificially defer catch-up to a later run.\n\nWithin a single extractLogs execution, once a capped sub-window is\ndrained we immediately advance to the next sub-window and continue,\nuntil we reach the effective window end (now - delay). Only when a run\nis interrupted (crash, abort, hitting a slow probe) do we resume on the\nnext scheduled run from the last persisted lastExecutionTimestamp.\n\n### How it works\n\nWhen the gap between `fromDateISO` and the effective window end (`now -\ndelay`) exceeds `maxTimeWindowSize + GRACE_PERIOD` (default `15m +\n30s`), the run processes the time range as a sequence of capped\n`[fromSub, toSub]` sub-windows of width `maxTimeWindowSize`, advancing\nwithin a single execution until the effective end is reached.\nSub-windows are an in-memory iteration concept — the saved-object schema\nis unaware of them. Crash recovery uses the per-slice persistence\nemitted by the inner outer-loop (last `paginationTimestamp` /\n`checkpointTimestamp` written). Manual `specificWindow` /\n`windowOverride` runs bypass capping and run as a single pass.\n\n\n### Added\n\n- `maxTimeWindowSize` parameter to the global configuration, available\non install and update paths. Also exposed via status api\n\n\n### Why default 15m\n\n`15m` seems to be an ok cap based on the default `3h` look back period.\nA too short `1m` will cause 180 queries to elasticsearch. `15m` will\ncause only 12.\n\nThis will need to be configured on heavy environments where `15m` worth\nof data account for millions of logs.\n\n---------\n\nCo-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>","sha":"555237a0dba491c31901c1b3323f667ecb580a0d"}},"sourceBranch":"main","suggestedTargetBranches":["9.4"],"targetPullRequestStates":[{"branch":"9.4","label":"v9.4.0","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v9.5.0","branchLabelMappingKey":"^v9.5.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/268170","number":268170,"mergeCommit":{"message":"[Entity Store] Cap window size (#268170)\n\nIn lagging environments the entity-store extraction window grows\nunboundedly. `getExtractionWindow` always sets `toDateISO = now - delay`\nwhile `fromDateISO` advances only via `lastExecutionTimestamp /\npaginationTimestamp`. If a run cannot keep up, each subsequent run sees\na wider window. The probe (`buildLogPaginationCursorProbeEsql`) sorts\nevery doc in that window in ES|QL, so probe cost grows with window size\n— feeding a death-spiral where slow runs widen the window which slows\nthe next run further.\n\nWe want a hard cap on the width of each probe's window so that probe\ncost stays bounded regardless of how far behind the engine is. The cap\nis purely a cost-bounding device for the probe — it does not\nartificially defer catch-up to a later run.\n\nWithin a single extractLogs execution, once a capped sub-window is\ndrained we immediately advance to the next sub-window and continue,\nuntil we reach the effective window end (now - delay). Only when a run\nis interrupted (crash, abort, hitting a slow probe) do we resume on the\nnext scheduled run from the last persisted lastExecutionTimestamp.\n\n### How it works\n\nWhen the gap between `fromDateISO` and the effective window end (`now -\ndelay`) exceeds `maxTimeWindowSize + GRACE_PERIOD` (default `15m +\n30s`), the run processes the time range as a sequence of capped\n`[fromSub, toSub]` sub-windows of width `maxTimeWindowSize`, advancing\nwithin a single execution until the effective end is reached.\nSub-windows are an in-memory iteration concept — the saved-object schema\nis unaware of them. Crash recovery uses the per-slice persistence\nemitted by the inner outer-loop (last `paginationTimestamp` /\n`checkpointTimestamp` written). Manual `specificWindow` /\n`windowOverride` runs bypass capping and run as a single pass.\n\n\n### Added\n\n- `maxTimeWindowSize` parameter to the global configuration, available\non install and update paths. Also exposed via status api\n\n\n### Why default 15m\n\n`15m` seems to be an ok cap based on the default `3h` look back period.\nA too short `1m` will cause 180 queries to elasticsearch. `15m` will\ncause only 12.\n\nThis will need to be configured on heavy environments where `15m` worth\nof data account for millions of logs.\n\n---------\n\nCo-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>","sha":"555237a0dba491c31901c1b3323f667ecb580a0d"}}]}] BACKPORT--> --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
1 parent d0ba5e1 commit dd70d94

19 files changed

Lines changed: 1053 additions & 161 deletions

File tree

oas_docs/output/kibana.serverless.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62714,6 +62714,10 @@ paths:
6271462714
maximum: 9007199254740991
6271562715
minimum: 1
6271662716
type: integer
62717+
excludedIndexPatterns:
62718+
items:
62719+
type: string
62720+
type: array
6271762721
fieldHistoryLength:
6271862722
maximum: 9007199254740991
6271962723
minimum: -9007199254740991
@@ -62730,6 +62734,9 @@ paths:
6273062734
maximum: 9007199254740991
6273162735
minimum: 1
6273262736
type: integer
62737+
maxTimeWindowSize:
62738+
pattern: '[smdh]$'
62739+
type: string
6273362740
required:
6273462741
- logExtraction
6273562742
responses:
@@ -70346,6 +70353,11 @@ paths:
7034670353
maximum: 9007199254740991
7034770354
minimum: 1
7034870355
type: integer
70356+
excludedIndexPatterns:
70357+
default: []
70358+
items:
70359+
type: string
70360+
type: array
7034970361
fieldHistoryLength:
7035070362
default: 10
7035170363
maximum: 9007199254740991
@@ -70367,6 +70379,10 @@ paths:
7036770379
maximum: 9007199254740991
7036870380
minimum: 1
7036970381
type: integer
70382+
maxTimeWindowSize:
70383+
default: 15m
70384+
pattern: '[smdh]$'
70385+
type: string
7037070386
responses:
7037170387
'200':
7037270388
content:

oas_docs/output/kibana.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66707,6 +66707,10 @@ paths:
6670766707
maximum: 9007199254740991
6670866708
minimum: 1
6670966709
type: integer
66710+
excludedIndexPatterns:
66711+
items:
66712+
type: string
66713+
type: array
6671066714
fieldHistoryLength:
6671166715
maximum: 9007199254740991
6671266716
minimum: -9007199254740991
@@ -66723,6 +66727,9 @@ paths:
6672366727
maximum: 9007199254740991
6672466728
minimum: 1
6672566729
type: integer
66730+
maxTimeWindowSize:
66731+
pattern: '[smdh]$'
66732+
type: string
6672666733
required:
6672766734
- logExtraction
6672866735
responses:
@@ -74339,6 +74346,11 @@ paths:
7433974346
maximum: 9007199254740991
7434074347
minimum: 1
7434174348
type: integer
74349+
excludedIndexPatterns:
74350+
default: []
74351+
items:
74352+
type: string
74353+
type: array
7434274354
fieldHistoryLength:
7434374355
default: 10
7434474356
maximum: 9007199254740991
@@ -74360,6 +74372,10 @@ paths:
7436074372
maximum: 9007199254740991
7436174373
minimum: 1
7436274374
type: integer
74375+
maxTimeWindowSize:
74376+
default: 15m
74377+
pattern: '[smdh]$'
74378+
type: string
7436374379
responses:
7436474380
'200':
7436574381
content:
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"10.1.0": [
3+
{
4+
"historySnapshot": {
5+
"status": "stopped",
6+
"frequency": "1h"
7+
},
8+
"logsExtraction": {
9+
"additionalIndexPatterns": [],
10+
"fieldHistoryLength": 10,
11+
"lookbackPeriod": "24h",
12+
"delay": "2m",
13+
"docsLimit": 10000,
14+
"timeout": "30s",
15+
"frequency": "1h"
16+
}
17+
}
18+
],
19+
"10.2.0": [
20+
{
21+
"historySnapshot": {
22+
"status": "stopped",
23+
"frequency": "1h"
24+
},
25+
"logsExtraction": {
26+
"additionalIndexPatterns": [],
27+
"fieldHistoryLength": 10,
28+
"lookbackPeriod": "24h",
29+
"delay": "2m",
30+
"docsLimit": 10000,
31+
"timeout": "30s",
32+
"frequency": "1h",
33+
"excludedIndexPatterns": [],
34+
"maxTimeWindowSize": "15m"
35+
}
36+
}
37+
]
38+
}

src/core/server/integration_tests/ci_checks/saved_objects/check_registered_types.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ describe('checking migration metadata changes on all registered SO types', () =>
105105
"entity-engine-descriptor-v2": "44b60aa3d3d4583082b58500b297f2dddb4ffc14b77ad1d568b70536ca4ae787",
106106
"entity-engine-status": "005903620a00737932aa54ae57817b078810b2f71cc42e7715d1c22c5e5b715e",
107107
"entity-store-ccs-state": "79b9cdbb27444593a2c07a4384313ea37c8399604f016e9d633b71cb9c937489",
108-
"entity-store-global-state": "8581bc65d1b2bf6d0218b693509129a2515599aeff8933d85353a3fb28d52bda",
108+
"entity-store-global-state": "eb60227cc1e7be835ed87a912b8bb7ed5a05fa5bd9c48948496ca4d47fc5d102",
109109
"epm-packages": "46e4129dba3ac33d4924239672169f12ad75536e9f44f695964220a80ebfeaca",
110110
"epm-packages-assets": "1095b56fabdeb3994a60f4da02e87179dfaf57d5bb23b97458129bf14c66b46e",
111111
"event-annotation-group": "21141aa64bba4d05ee6ebe0b0d75475452bca50e73f902a38800457d0727014d",
@@ -666,6 +666,7 @@ describe('checking migration metadata changes on all registered SO types', () =>
666666
"entity-store-global-state|global: f625b80f055d7a5a819b3b312d6cf51a5e10b61f",
667667
"entity-store-global-state|mappings: e1b10e5bec060a176469a5e9a4f80c94e23abcd7",
668668
"entity-store-global-state|schemas: da39a3ee5e6b4b0d3255bfef95601890afd80709",
669+
"entity-store-global-state|10.2.0: 01078c5bd2f6664e89eab8e3e5e6de6ee23fc1fd9b00b88919ff568f4abd5c09",
669670
"entity-store-global-state|10.1.0: e142dccd899fda050613a1fc6414807296969934f97cbebe4a2b1dd02d20a4a0",
670671
"==================================================================================================",
671672
"epm-packages|global: 9d90d41b665a6b53aa6e984ad0e100ff733e05b9",
@@ -1480,7 +1481,7 @@ describe('checking migration metadata changes on all registered SO types', () =>
14801481
"entity-engine-descriptor-v2": "10.5.0",
14811482
"entity-engine-status": "10.2.0",
14821483
"entity-store-ccs-state": "10.1.0",
1483-
"entity-store-global-state": "10.1.0",
1484+
"entity-store-global-state": "10.2.0",
14841485
"epm-packages": "10.8.0",
14851486
"epm-packages-assets": "10.0.0",
14861487
"event-annotation-group": "10.0.0",
@@ -1648,7 +1649,7 @@ describe('checking migration metadata changes on all registered SO types', () =>
16481649
"entity-engine-descriptor-v2": "10.5.0",
16491650
"entity-engine-status": "10.2.0",
16501651
"entity-store-ccs-state": "10.1.0",
1651-
"entity-store-global-state": "10.1.0",
1652+
"entity-store-global-state": "10.2.0",
16521653
"epm-packages": "10.8.0",
16531654
"epm-packages-assets": "0.0.0",
16541655
"event-annotation-group": "0.0.0",

x-pack/solutions/security/plugins/entity_store/server/domain/logs_extraction/PAGINATION.md

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Logs Extraction Pagination
22

3-
Two nested loops process raw log documents into aggregated entity rows.
3+
Three nested loops process raw log documents into aggregated entity rows.
4+
5+
**Window cap outer loop**: When the gap between `fromDateISO` and the effective window end (`now - delay`) exceeds `maxTimeWindowSize + GRACE_PERIOD` (default `15m + 30s`), the run processes the time range as a sequence of capped `[fromSub, toSub]` sub-windows of width `maxTimeWindowSize`, advancing within a single execution until the effective end is reached. Sub-windows are an in-memory iteration concept — the saved-object schema is unaware of them. Crash recovery uses the per-slice persistence emitted by the inner outer-loop (last `paginationTimestamp` / `checkpointTimestamp` written). Manual `specificWindow` / `windowOverride` runs bypass capping and run as a single pass.
46

57
**Outer loop — log slices**: Each iteration runs a **boundary probe** (`buildLogPaginationCursorProbeEsql`) to locate the inclusive end of the next raw-log slice (up to `maxLogsPerPage` documents, sorted by `@timestamp ASC, _id ASC`). The probe returns `total_logs` (count before `LIMIT`) so the client knows when the window is exhausted.
68

@@ -12,7 +14,7 @@ Two nested loops process raw log documents into aggregated entity rows.
1214

1315
| Cursor | Persisted fields | Semantics |
1416
|--------|-----------------|-----------|
15-
| **Log slice start** | `logsPageCursorStartTimestamp/Id` | Exclusive compound lower bound `(@timestamp, _id)` for the next probe. Set to the previous slice end after completing all entity pages. |
17+
| **Log slice start** | `logsPageCursorStartTimestamp/Id` | Exclusive compound lower bound `(@timestamp, _id)` for the next probe. Set to the previous slice end after completing all entity pages. Doubles as the resume point on crash mid-run — no separate sub-window checkpoint is persisted. |
1618
| **Log slice end** | `logsPageCursorEndTimestamp/Id` | Inclusive upper bound for the current slice. Set by the probe; cleared when the slice is fully processed. |
1719
| **Entity cursor** | `paginationTimestamp/Id` | `(FirstSeenLogInPage, UntypedId)` of the last ingested entity page. Cleared when a slice finishes. |
1820

@@ -92,6 +94,39 @@ If the process crashes mid inner-loop, `paginationId` is set in the saved state.
9294

9395
---
9496

97+
## Lagging environment: multiple sub-windows in one run
98+
99+
When `effectiveWindowEnd - fromDateISO > maxTimeWindowSize + GRACE_PERIOD`, the time range is processed as a sequence of capped sub-windows within a single `extractLogs` run. Each sub-window runs the existing slice/entity loops to completion. Persistence between sub-windows is whatever the inner outer-loop already wrote (per-slice `paginationTimestamp`); no extra checkpoint round-trip is added.
100+
101+
```mermaid
102+
sequenceDiagram
103+
participant C as Client
104+
participant ES as Elasticsearch
105+
Note over C: fromDateISO=T0, effectiveEnd=T0+15m, cap=5m
106+
107+
rect rgb(240, 240, 240)
108+
Note over C: sub-window 1: [T0, T0+5m]
109+
C->>ES: probe → slice end, then extract + ingest entities
110+
Note over C: per-slice persistence: paginationTimestamp = lastSliceEnd_ts
111+
end
112+
113+
rect rgb(240, 240, 240)
114+
Note over C: sub-window 2: [T0+5m, T0+10m] (in-memory advance)
115+
C->>ES: probe → slice end, then extract + ingest entities
116+
end
117+
118+
rect rgb(240, 240, 240)
119+
Note over C: sub-window 3: [T0+10m, T0+15m] (effective end — not capped)
120+
C->>ES: probe → slice end, then extract + ingest entities
121+
end
122+
123+
Note over C: final cleanup: clear all cursors, set lastExecutionTimestamp = T0+15m
124+
```
125+
126+
If the process is aborted between sub-windows, recovery resumes from the last persisted slice end (`paginationTimestamp` set by the inner outer-loop after the most recently completed slice) — not from a sub-window boundary. The next run re-establishes its own sub-window cap from that resume point.
127+
128+
---
129+
95130
## Recovery
96131

97132
A crash mid-entity-page leaves the following state on disk:
@@ -126,10 +161,16 @@ sequenceDiagram
126161

127162
The entity-level pagination WHERE uses `> T_ent OR (= T_ent AND untypedId > E_ent)` — entities already ingested before the crash are skipped; the slice is re-established from `T_ent` inclusive.
128163

164+
A crash *between* sub-windows is indistinguishable from a crash at a slice boundary: the most recently persisted state is `paginationTimestamp = lastSliceEnd_ts` (from the inner outer-loop's per-slice `advanceEngineStateAfterLogPageCompletes`). The next run reads that as `fromDateISO` and re-establishes the sub-window cap from there — re-fetching the slice-boundary doc itself, which is harmless under the idempotent aggregations (`TOP`, `LAST`, `MIN`, `MV_UNION`).
165+
129166
---
130167

131168
## Edge cases
132169

170+
### Cap interaction with `specificWindow` / `windowOverride`
171+
172+
When a manual window is supplied (admin-triggered API call), the sub-window cap is bypassed and the supplied bounds are processed in a single pass via the existing slice/entity loops. State is not advanced — the user explicitly picked the bounds, and we do not silently shorten or shift them.
173+
133174
### Timestamp collision at a slice boundary
134175

135176
The compound cursor `(@timestamp = T AND _id > id)` is essential when multiple documents share the same millisecond timestamp. If the base time-window filter used `@timestamp > fromDateISO` (exclusive) and `fromDateISO == T`, all same-timestamp documents would be discarded before the compound filter could apply — permanently losing them.

x-pack/solutions/security/plugins/entity_store/server/domain/logs_extraction/ccs_logs_extraction_client.test.ts

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ describe('CcsLogsExtractionClient', () => {
7676
lookbackPeriod: '3h',
7777
delay: '1m',
7878
entityDefinition: getEntityDefinition('host', 'default'),
79+
// Use a very large cap so existing tests remain a single sub-window. The sub-window cap
80+
// behavior is exercised by the dedicated tests at the end of this describe block.
81+
maxTimeWindowSize: '999d',
7982
};
8083

8184
beforeEach(() => {
@@ -472,4 +475,102 @@ describe('CcsLogsExtractionClient', () => {
472475
expect(mockExecuteEsqlQuery).not.toHaveBeenCalled();
473476
expect(mockCcsStateClient.clearRecoveryId).not.toHaveBeenCalled();
474477
});
478+
479+
describe('sub-window cap', () => {
480+
it('walks the time window in capped sub-windows when checkpointTimestamp is far behind effectiveWindowEnd', async () => {
481+
// FIXED_NOW = 2026-01-01T12:00 ; delay = 1m → effectiveWindowEnd = 2026-01-01T11:59
482+
// checkpoint = 2026-01-01T11:29 → window ~30m, cap=5m, grace=30s → 6 sub-windows.
483+
const checkpoint = '2026-01-01T11:29:00.000Z';
484+
mockCcsStateClient.findOrInit.mockResolvedValue({
485+
checkpointTimestamp: checkpoint,
486+
paginationRecoveryId: null,
487+
});
488+
// Each sub-window probe returns empty (no logs), so the inner outer-loop terminates
489+
// immediately and never persists per-slice checkpoints. No state updates occur.
490+
mockExecuteEsqlQuery.mockResolvedValue(emptyProbeResponse);
491+
492+
const result = await client.extractToUpdates({
493+
...defaultExtractParams,
494+
maxTimeWindowSize: '5m',
495+
});
496+
497+
expect(result).toEqual({ count: 0, pages: 0 });
498+
// 6 sub-windows × 1 probe each.
499+
expect(mockExecuteEsqlQuery).toHaveBeenCalledTimes(6);
500+
// No per-sub-window checkpoint persistence — inner per-slice persistence is the only
501+
// mechanism, and it didn't fire because every probe was empty.
502+
expect(mockCcsStateClient.update).not.toHaveBeenCalled();
503+
// count=0 across all sub-windows → clearRecoveryId
504+
expect(mockCcsStateClient.clearRecoveryId).toHaveBeenCalledWith('host');
505+
});
506+
507+
it('does not cap when the gap is within maxTimeWindowSize + grace', async () => {
508+
// Window ~ 5m + 10s, cap = 5m, grace = 30s → no cap, single sub-window.
509+
const checkpoint = '2026-01-01T11:53:50.000Z';
510+
mockCcsStateClient.findOrInit.mockResolvedValue({
511+
checkpointTimestamp: checkpoint,
512+
paginationRecoveryId: null,
513+
});
514+
mockExecuteEsqlQuery.mockResolvedValueOnce(emptyProbeResponse);
515+
516+
await client.extractToUpdates({
517+
...defaultExtractParams,
518+
maxTimeWindowSize: '5m',
519+
});
520+
521+
expect(mockExecuteEsqlQuery).toHaveBeenCalledTimes(1);
522+
// Empty probe → no per-slice state updates either.
523+
expect(mockCcsStateClient.update).not.toHaveBeenCalled();
524+
});
525+
526+
it('bypasses the sub-window cap when windowOverride is provided', async () => {
527+
const overrideFrom = '2024-01-01T00:00:00.000Z';
528+
const overrideTo = '2024-12-31T23:59:00.000Z'; // ~1y, exceeds the 5m cap
529+
530+
mockExecuteEsqlQuery.mockResolvedValueOnce(emptyProbeResponse);
531+
532+
await client.extractToUpdates({
533+
...defaultExtractParams,
534+
maxTimeWindowSize: '5m',
535+
windowOverride: { fromDateISO: overrideFrom, toDateISO: overrideTo },
536+
});
537+
538+
// Single probe over the full user-supplied window — no sub-window splitting.
539+
expect(mockExecuteEsqlQuery).toHaveBeenCalledTimes(1);
540+
const probeQuery = mockExecuteEsqlQuery.mock.calls[0][0].query as string;
541+
expect(probeQuery).toContain(overrideFrom);
542+
expect(probeQuery).toContain(overrideTo);
543+
// Override runs do not touch CCS state.
544+
expect(mockCcsStateClient.findOrInit).not.toHaveBeenCalled();
545+
expect(mockCcsStateClient.update).not.toHaveBeenCalled();
546+
});
547+
548+
it('passes monotonically advancing fromDateISO/toDateISO to each sub-window probe', async () => {
549+
const checkpoint = '2026-01-01T11:44:00.000Z'; // 15m before effectiveWindowEnd
550+
mockCcsStateClient.findOrInit.mockResolvedValue({
551+
checkpointTimestamp: checkpoint,
552+
paginationRecoveryId: null,
553+
});
554+
mockExecuteEsqlQuery.mockResolvedValue(emptyProbeResponse);
555+
556+
await client.extractToUpdates({
557+
...defaultExtractParams,
558+
maxTimeWindowSize: '5m',
559+
});
560+
561+
expect(mockExecuteEsqlQuery).toHaveBeenCalledTimes(3);
562+
563+
const subWindow1 = mockExecuteEsqlQuery.mock.calls[0][0].query as string;
564+
expect(subWindow1).toContain('2026-01-01T11:44:00.000Z');
565+
expect(subWindow1).toContain('2026-01-01T11:49:00.000Z');
566+
567+
const subWindow2 = mockExecuteEsqlQuery.mock.calls[1][0].query as string;
568+
expect(subWindow2).toContain('2026-01-01T11:49:00.000Z');
569+
expect(subWindow2).toContain('2026-01-01T11:54:00.000Z');
570+
571+
const subWindow3 = mockExecuteEsqlQuery.mock.calls[2][0].query as string;
572+
expect(subWindow3).toContain('2026-01-01T11:54:00.000Z');
573+
expect(subWindow3).toContain('2026-01-01T11:59:00.000Z');
574+
});
575+
});
475576
});

0 commit comments

Comments
 (0)