pgayvallet
diff --git a/‎x-pack/platform/packages/shared/kbn-streams-ai/index.ts‎
Lines changed: 4 additions & 1 deletion b/‎x-pack/platform/packages/shared/kbn-streams-ai/index.ts‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎x-pack/platform/packages/shared/kbn-streams-ai/src/significant_events/generate_significant_events.ts‎
Lines changed: 38 additions & 0 deletions b/‎x-pack/platform/packages/shared/kbn-streams-ai/src/significant_events/generate_significant_events.ts‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎x-pack/platform/packages/shared/kbn-streams-ai/src/significant_events/prompt.ts‎
Lines changed: 6 additions & 0 deletions b/‎x-pack/platform/packages/shared/kbn-streams-ai/src/significant_events/prompt.ts‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎x-pack/platform/packages/shared/kbn-streams-ai/src/significant_events/system_prompt.text‎
Lines changed: 114 additions & 5 deletions b/‎x-pack/platform/packages/shared/kbn-streams-ai/src/significant_events/system_prompt.text‎
Lines changed: 114 additions & 5 deletions
diff --git a/‎x-pack/platform/packages/shared/kbn-streams-ai/src/significant_events/user_prompt.text‎
Lines changed: 5 additions & 0 deletions b/‎x-pack/platform/packages/shared/kbn-streams-ai/src/significant_events/user_prompt.text‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎x-pack/platform/packages/shared/kbn-streams-schema/index.ts‎
Lines changed: 2 additions & 0 deletions b/‎x-pack/platform/packages/shared/kbn-streams-schema/index.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎x-pack/platform/packages/shared/kbn-streams-schema/src/api/significant_events/index.ts‎
Lines changed: 1 addition & 0 deletions b/‎x-pack/platform/packages/shared/kbn-streams-schema/src/api/significant_events/index.ts‎
Lines changed: 1 addition & 0 deletions
@@ -19,7 +19,10 @@ export {
   type SuggestProcessingPipelineResult,
   type SuggestPipelineAgentSchema,
 } from './workflows/suggest_processing_pipeline';
-export { generateSignificantEvents } from './src/significant_events/generate_significant_events';
+export {
+  generateSignificantEvents,
+  type ExistingQuerySummary,
+} from './src/significant_events/generate_significant_events';
 export {
   createDefaultSignificantEventsToolUsage,
   type SignificantEventsToolUsage,
 
@@ -13,6 +13,7 @@ import {
   ensureMetadata,
   getSourcesForStream,
   getStatsQueryHints,
+  normalizeEsqlSafe,
   replaceFromSources,
 } from '@kbn/streams-schema';
 import type { ElasticsearchClient, Logger } from '@kbn/core/server';
@@ -40,6 +41,17 @@ import {
   type SignificantEventsToolUsage,
 } from './tools/tool_usage';
 
+const MAX_EXISTING_QUERIES_FOR_CONTEXT = 50;
+
+export interface ExistingQuerySummary {
+  id: string;
+  title: string;
+  type: string;
+  severity_score?: number;
+  description: string;
+  esql: string;
+}
+
 /**
  * Intermediate representation of a query as produced by the LLM tool output.
  * Uses a flat `esql` string (vs the wrapped `EsqlQuery` in the wire type)
@@ -53,6 +65,7 @@ interface ParsedToolQuery {
   category: SignificantEventType;
   severity_score: number;
   evidence?: string[];
+  replaces?: string;
 }
 
 function getErrorMessage(error: unknown): string {
@@ -73,6 +86,7 @@ export async function generateSignificantEvents({
   logger,
   additionalTools,
   additionalToolCallbacks,
+  existingQueries,
 }: {
   stream: Streams.all.Definition;
   esClient: ElasticsearchClient;
@@ -87,6 +101,7 @@ export async function generateSignificantEvents({
   systemPrompt: string;
   additionalTools?: Record<string, ToolDefinition>;
   additionalToolCallbacks?: Record<string, ToolCallback>;
+  existingQueries?: ExistingQuerySummary[];
 }): Promise<{
   queries: ParsedToolQuery[];
   tokensUsed: ChatCompletionTokenCount;
@@ -99,6 +114,18 @@ export async function generateSignificantEvents({
   const prompt = createGenerateSignificantEventsPrompt({ systemPrompt, additionalTools });
   const targetSources = getSourcesForStream(stream);
 
+  const existingQueriesList = existingQueries ?? [];
+
+  const normalizedStoredEsqls = new Set(existingQueriesList.map((q) => normalizeEsqlSafe(q.esql)));
+
+  const existingQueriesContext = existingQueriesList.length
+    ? JSON.stringify(
+        [...existingQueriesList]
+          .sort((a, b) => (b.severity_score ?? 0) - (a.severity_score ?? 0))
+          .slice(0, MAX_EXISTING_QUERIES_FOR_CONTEXT)
+      )
+    : '';
+
   logger.trace('Generating significant events via reasoning agent');
   const response = await withSpan('generate_significant_events', () =>
     executeAsReasoningAgent({
@@ -107,6 +134,7 @@ export async function generateSignificantEvents({
         description: stream.description,
         available_feature_types: SIGNIFICANT_EVENTS_FEATURE_TOOL_TYPES.join(', '),
         computed_feature_instructions: getComputedFeatureInstructions(),
+        existing_queries: existingQueriesContext,
       },
       maxSteps: additionalToolCallbacks ? 6 : 4,
       prompt,
@@ -185,6 +213,16 @@ export async function generateSignificantEvents({
                     ? sourceRewritten
                     : ensureMetadata(sourceRewritten);
 
+                if (normalizedStoredEsqls.has(normalizeEsqlSafe(rewritten))) {
+                  return {
+                    query: { ...query, type: derivedType, esql: rewritten },
+                    valid: false,
+                    status: 'Duplicate',
+                    error: 'This query already exists for this stream.',
+                    hints: undefined,
+                  };
+                }
+
                 const hints = getStatsQueryHints(rewritten);
 
                 await esClient.esql.query({
 
@@ -34,6 +34,7 @@ export function createGenerateSignificantEventsPrompt({
       description: z.string(),
       available_feature_types: z.string(),
       computed_feature_instructions: z.string(),
+      existing_queries: z.string(),
     }),
   })
     .version({
@@ -121,6 +122,11 @@ export function createGenerateSignificantEventsPrompt({
                         type: 'string',
                       },
                     },
+                    replaces: {
+                      type: 'string',
+                      description:
+                        'If this query replaces an existing one (same detection intent but updated ES|QL), set this to the ID of the existing query from `existing_queries`.',
+                    },
                   },
                   required: ['esql', 'title', 'description', 'category', 'severity_score'],
                 },
 
@@ -121,12 +121,121 @@ Schema features indicate the **log schema family** (ecs, otel, or custom) detect
 | Tool | Function | Notes |
 | :--- | :--- | :--- |
 | `get_stream_features` | Fetches stream features for this stream. | **MUST be called first before any `add_queries` call.** Call without filters to get all features at once. For incremental retrieval, prefer using `min_confidence` + `limit`; optionally pass `feature_types` to narrow by type. Results include computed features (dataset_analysis, log_samples, log_patterns, error_logs) and inferred features. Supported values: `{{{available_feature_types}}}`. |
-| `add_queries` | Submits one or more ES|QL queries for the user. | Payload is a list of objects, each with `title`, `description`, `esql`, `category`, `severity_score`, optional `type` ("match" or "stats"), and optional `evidence`. |
+| `add_queries` | Submits one or more ES|QL queries for the user. | Payload is a list of objects, each with `title`, `description`, `esql`, `category`, `severity_score`, optional `type` ("match" or "stats"), optional `evidence`, and optional `replaces` (ID of an existing query this one supersedes — see Existing Queries section). |
 | `reason()` | **Begin a Reasoning Monologue** | Outputs your private thoughts. Must use sentinel tags (`<<<BEGIN_INTERNAL>>>`...`<<<END_INTERNAL>>>`). |
 | `complete()` | Declare readiness to answer | Ends the loop and triggers the **Definitive Output**. |
 
 ---
 
+## Existing Queries
+
+If `existing_queries` are provided in the input, they list queries already
+created for this stream in previous generation runs. Each entry includes an
+`id`, `title`, `type`, `severity_score`, `description`, and `esql`.
+
+You MUST evaluate every existing query against the current stream features
+before generating any new queries. For each existing query, determine which
+of these three cases applies:
+
+### Case 1: Query is still valid — SKIP
+
+The query's detection intent is still relevant AND its ES|QL references fields
+that are still present and meaningful in the current `dataset_analysis`.
+
+Apply this case whenever your candidate ES|QL is **functionally equivalent**
+to an existing query (matches the same documents), not just when it is
+textually identical. See "Rules" below for the equivalence patterns you must
+recognize.
+
+**Action:** Do NOT re-emit this query. Do not include it in any `add_queries`
+call. It is already stored and working correctly.
+
+### Case 2: Query needs updated ES|QL — REPLACE
+
+The query's detection intent is still relevant, but the underlying stream
+fields have changed since the query was last generated. Examples:
+
+- A field the query references has been removed, renamed, or is no longer
+  populated
+- The field value distribution has changed (e.g., `log.level` was multi-valued
+  but is now uniform — the query needs an alternate error signal)
+- A better or more specific field has become available for the same detection
+  goal (e.g., `error.type` appeared where only message patterns existed before)
+- The query's thresholds or structure no longer match the current data patterns
+
+**Action:** Re-emit the query in `add_queries` with:
+- Updated ES|QL reflecting the current field landscape
+- `replaces` set to the `id` of the existing query it supersedes
+- The same title (or improved), updated description if needed
+- Recalibrated `severity_score` if the signal strength changed
+
+### Case 3: Novel detection — NEW QUERY
+
+The stream features reveal a detection opportunity not covered by any existing
+query. This is a genuinely new signal dimension.
+
+**Action:** Emit the query in `add_queries` normally, without setting
+`replaces`. Focus on dimensions not already covered by existing queries.
+
+### Rules
+
+- **Functional equivalence counts as Case 1 (SKIP).** Two queries are
+  functionally equivalent when they match the same documents, regardless of
+  syntax. Treat the following as the same query and do not re-emit:
+  - `f:"x" AND f:"y"` vs `f:"y" AND f:"x"` (operand order in AND/OR)
+  - `f IN ("a", "b")` vs `f == "a" OR f == "b"` (IN vs equality OR chain)
+  - Same predicate with a redundant filter added or removed (e.g., scoping to
+    `service.name == "X"` when the stream only contains service "X")
+  - Literal-formatting differences that don't change the comparison
+    (`error_rate > 5` vs `error_rate > 5.0`, `duration > 1000` vs
+    `duration > 1_000`)
+  - Boundary changes are NOT equivalent: `> 50` and `>= 50` match different
+    documents at the boundary value, so do **not** treat them as the same
+    query. The same applies to `<` vs `<=` and to threshold value changes
+    (e.g., `> 50` vs `> 100`) — these are Case 2 (REPLACE) or Case 3 (NEW)
+    candidates, never Case 1 (SKIP).
+- **Strict subset/superset is also a SKIP.** If your candidate query adds a
+  filter to an existing query (or removes one), it matches a subset/superset
+  of the existing documents — not a new signal. In particular,
+  `MATCH_PHRASE(f, "a b c")` is a strict subset of `f:"a" AND f:"b" AND f:"c"`
+  (the phrase requires adjacency and order; the AND chain does not). Pick one
+  form per concept and do not emit both.
+- **Prefer SKIP over REPLACE when uncertain.** Use `replaces` only when you
+  are confident the new query preserves the same detection intent AND the
+  existing ES|QL is clearly outdated. When unsure, skip (Case 1) rather than
+  emit a near-duplicate.
+- Never set `replaces` on a genuinely new query.
+- Never emit two queries with `replaces` pointing to the same existing query
+  ID — pick the best replacement and skip the other.
+- **Intra-batch dedup is your responsibility.** Do not emit two queries in the
+  same `add_queries` call that are functionally equivalent to each other. The
+  automatic duplicate check only rejects exact ES|QL matches after
+  normalization; near-duplicates from the patterns above slip through.
+
+### Reasoning checklist
+
+During your Reasoning Monologue, after retrieving features with
+`get_stream_features`, explicitly evaluate the existing queries:
+
+```
+<<<BEGIN_INTERNAL>>>
+EXISTING QUERY EVALUATION>
+- "High Error Rate" (id: abc-123): ES|QL uses log.level IN ("ERROR", ...).
+  dataset_analysis shows log.level is present with 3% error distribution.
+  Fields still valid → SKIP.
+- "Database Timeout Detection" (id: def-456): ES|QL uses body.text:"timeout"
+  scoped to service.name == "old-db-service". dataset_analysis shows
+  service.name no longer contains "old-db-service"; replaced by "new-db".
+  Intent valid, field changed → REPLACE with updated entity scope.
+- No existing query covers auth failure patterns, but event.outcome is present
+  with 10% failure rate → NEW QUERY opportunity.
+<<<END_INTERNAL>>>
+```
+
+If `existing_queries` is empty or absent, skip this evaluation entirely.
+
+---
+
 ## 2. Core Loop — Act/Gather ➜ **Reason** ➜ Decide (continue or complete)
 
 **Mandatory first step:** Your very first action must be to call `get_stream_features` (without filters) to retrieve all available features. Do not call `add_queries` until you have reviewed the feature results and grounded your queries in them.
@@ -407,7 +516,7 @@ Alternatively, you can use the function form `MATCH(field, "query")` which is eq
 - **All terms required (3+ terms):** `MATCH(body.text, "connection timeout error", {"operator": "AND"})` — cleaner for many terms.
 - **Exact phrase (word order matters):** `MATCH_PHRASE(body.text, "connection timeout")` — use only for well-known phrases where word order and adjacency are semantically important (e.g., `"Failed password for"`, `"Out of memory"`, `"Started Application in"`).
 
-**Default to separate `:` terms with `AND`** for multi-term queries. Reserve `MATCH_PHRASE` for known phrases where order matters.
+**Default to separate `:` terms with `AND`** for multi-term queries. Reserve `MATCH_PHRASE` for known phrases where word order is semantic (e.g., `"Failed password for"`, `"Out of memory"`). **Pick ONE form per concept** — never emit both the `AND`-of-`:` variant and the `MATCH_PHRASE` variant as separate queries. The `AND` chain is strictly broader than the phrase (it does not require adjacency or word order), so emitting both creates a subset/superset redundancy for the same signal — pick the form that matches the detection intent and skip the other.
 
 **On keyword fields**, `:` performs exact matching (the field is not analyzed):
 - `log.level:"ERROR"` → exact match on keyword field
@@ -653,10 +762,10 @@ FROM <stream>
 ### Guardrails
 
 1. **Precision first:** Higher threshold is safer. Prefer zero firings over false positives.
-2. **Signal diversity:** Each STATS query covers a distinct failure dimension (error rate, latency, throughput, auth, cardinality). Consolidate overlapping signals.
+2. **Signal diversity:** Emit **at most one STATS query per distinct failure dimension** (error rate, latency, throughput, auth, cardinality). If multiple candidates target the same dimension, pick the strongest and skip the others — do not emit variants that differ only in threshold, entity scope, or aggregation shape.
 3. **Bucket sizing:** Default 5 min. Low traffic (<100/min): 10–15 min. High traffic (>1000/min): 1–2 min.
 4. **Descriptions must answer:** (1) what user-visible problem, (2) what action, (3) threshold reasoning vs baseline, (4) thresholds may need adjustment, (5) include a "can indicate..." clause naming likely root causes — investigation agents use this as the hypothesis claim.
 5. **Severity:** Weigh relative deviation AND absolute impact. 2× from 0.1%→0.2% is low (~40). 2× from 20%→40% is critical (~85).
-6. **Complementarity:** For each important signal, generate a **detection + evidence pair**: a STATS query (BY bucket) for aggregate detection and a match query for evidence retrieval. The STATS query gets higher severity (tested first by investigators). Optionally add a per-entity STATS variant (BY <entity_field>, bucket) when entity cardinality is 2–50 and isolation adds diagnostic value — this counts as a separate dimension toward the >5 cap. Do not generate entity variants for single-entity streams.
+6. **Complementarity:** For each important signal, emit **at most three queries**: one STATS query (BY bucket) for aggregate detection, one match query for evidence retrieval, and optionally one per-entity STATS variant (BY <entity_field>, bucket) when entity cardinality is 2–50 and isolation adds diagnostic value. Pick a single entity field — never emit multiple per-entity variants for the same signal. The STATS query gets higher severity (tested first by investigators). Do not generate entity variants for single-entity streams.
 7. **Forbidden:** No `CATEGORIZE`, `CHANGE_POINT`, nested `STATS`, `SORT`/`LIMIT`/`KEEP`, `EVAL` outside STATS queries, high-cardinality BY (>50 distinct).
-8. **Target:** Generate as many as the data justifies — zero is fine if no pattern has field evidence. If >5, verify each adds a distinct dimension. Quality over quantity.
+8. **Target:** Generate as many as the data justifies — **do not pad**. Zero is fine if no pattern has field evidence. Every query must cite a concrete field, value distribution, log pattern, or feature from `dataset_analysis`/`get_stream_features`. Stop as soon as every remaining candidate would restate evidence you've already used or would be functionally equivalent to another emitted query (see the Rules in "Existing Queries"). The total count is bounded by Guardrails 2 and 6 (one per dimension, at most three per signal) — there is no fixed quota. Quality over quantity.
@@ -3,3 +3,8 @@
 
 `description`:
 {{{description}}}
+
+{{#existing_queries}}
+`existing_queries`:
+{{{existing_queries}}}
+{{/existing_queries}}
@@ -74,6 +74,8 @@ export {
   hasStatsCommand,
   MS_PER_UNIT,
   normalizeEsqlQuery,
+  normalizeEsqlSafe,
+  hasSameEsql,
   replaceFromSources,
   rewriteFromSources,
 } from './src/helpers/esql_helpers';
 
@@ -82,6 +82,7 @@ interface GeneratedSignificantEventQuery {
   severity_score: number;
   evidence?: string[];
   description: string;
+  replaces?: string;
 }
 
 type SignificantEventsGenerateResponse = Observable<
Original file line number	Diff line number	Diff line change
`@@ -82,6 +82,7 @@ interface GeneratedSignificantEventQuery {`
`82`	`82`	`severity_score: number;`
`83`	`83`	`evidence?: string[];`
`84`	`84`	`description: string;`
	`85`	`+ replaces?: string;`
`85`	`86`	`}`
`86`	`87`
`87`	`88`	`type SignificantEventsGenerateResponse = Observable<`