-
Notifications
You must be signed in to change notification settings - Fork 8.6k
[ResponseOps] Clear flapping when rules are enabled or bulk enabled #235024
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d006011
5f3b0b7
590b71e
6f02528
fdee114
0522797
509f334
45df9bc
076aa9c
211c0a8
90bae68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,183 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|
|
||
| import type { ElasticsearchClientMock } from '@kbn/core/server/mocks'; | ||
| import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks'; | ||
| import { clearAlertFlappingHistory } from './clear_alert_flapping_history'; | ||
| import { ALERT_UUID } from '@kbn/rule-data-utils'; | ||
|
|
||
| let clusterClient: ElasticsearchClientMock; | ||
| let logger: ReturnType<(typeof loggingSystemMock)['createLogger']>; | ||
|
|
||
| describe('clearAlertFlappingHistory()', () => { | ||
| beforeEach(() => { | ||
| logger = loggingSystemMock.createLogger(); | ||
| clusterClient = elasticsearchServiceMock.createClusterClient().asInternalUser; | ||
| clusterClient.updateByQuery.mockResponse({ | ||
| total: 1, | ||
| updated: 1, | ||
| }); | ||
|
|
||
| clusterClient.search.mockResponse({ | ||
| took: 1, | ||
| timed_out: false, | ||
| _shards: { | ||
| total: 1, | ||
| successful: 1, | ||
| skipped: 0, | ||
| failed: 0, | ||
| }, | ||
| hits: { | ||
| hits: [ | ||
| { | ||
| _index: 'test-index', | ||
| _source: { | ||
| [ALERT_UUID]: 'test-alert-id', | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }); | ||
| }); | ||
|
|
||
| afterEach(() => { | ||
| jest.resetAllMocks(); | ||
| }); | ||
|
|
||
| test('should call updateByQuery with provided rule id and index', async () => { | ||
| await clearAlertFlappingHistory({ | ||
| logger, | ||
| esClient: clusterClient, | ||
| indices: ['test-index'], | ||
| ruleIds: ['test-rule'], | ||
| }); | ||
|
|
||
| expect(clusterClient.updateByQuery).toHaveBeenCalledTimes(1); | ||
| expect(clusterClient.updateByQuery.mock.lastCall).toMatchInlineSnapshot(` | ||
| Array [ | ||
| Object { | ||
| "allow_no_indices": true, | ||
| "conflicts": "proceed", | ||
| "index": Array [ | ||
| "test-index", | ||
| ], | ||
| "query": Object { | ||
| "bool": Object { | ||
| "must": Array [ | ||
| Object { | ||
| "bool": Object { | ||
| "should": Array [ | ||
| Object { | ||
| "term": Object { | ||
| "kibana.alert.status": Object { | ||
| "value": "active", | ||
| }, | ||
| }, | ||
| }, | ||
| Object { | ||
| "term": Object { | ||
| "kibana.alert.status": Object { | ||
| "value": "recovered", | ||
| }, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| Object { | ||
| "bool": Object { | ||
| "should": Array [ | ||
| Object { | ||
| "term": Object { | ||
| "kibana.alert.rule.uuid": Object { | ||
| "value": "test-rule", | ||
| }, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| "refresh": true, | ||
| "script": Object { | ||
| "lang": "painless", | ||
| "source": " | ||
| ctx._source['kibana.alert.flapping'] = false; | ||
| ctx._source['kibana.alert.flapping_history'] = new ArrayList(); | ||
| ", | ||
| }, | ||
| }, | ||
| ] | ||
| `); | ||
| }); | ||
|
|
||
| test('should throw if either indices or ruleIds is empty', async () => { | ||
| await expect( | ||
| clearAlertFlappingHistory({ | ||
| logger, | ||
| esClient: clusterClient, | ||
| indices: [], | ||
| ruleIds: ['test-rule'], | ||
| }) | ||
| ).rejects.toThrow('Rule Ids and indices must be provided'); | ||
|
|
||
| await expect( | ||
| clearAlertFlappingHistory({ | ||
| logger, | ||
| esClient: clusterClient, | ||
| indices: ['test-index'], | ||
| ruleIds: [], | ||
| }) | ||
| ).rejects.toThrow('Rule Ids and indices must be provided'); | ||
|
|
||
| expect(clusterClient.updateByQuery).toHaveBeenCalledTimes(0); | ||
| }); | ||
|
|
||
| test('should throw if could not clear flapping history', async () => { | ||
| clusterClient.updateByQuery.mockRejectedValue(Error('something went wrong!')); | ||
| await expect( | ||
| clearAlertFlappingHistory({ | ||
| logger, | ||
| esClient: clusterClient, | ||
| indices: ['test-index'], | ||
| ruleIds: ['test-rule'], | ||
| }) | ||
| ).rejects.toThrow('something went wrong!'); | ||
|
|
||
| expect(clusterClient.updateByQuery).toHaveBeenCalledTimes(1); | ||
|
|
||
| expect(logger.error).toHaveBeenCalledWith( | ||
| 'Error clearing alert flapping for indices: test-index, ruleIds: test-rule - something went wrong!' | ||
| ); | ||
| }); | ||
|
|
||
| test('should retry updateByQuery if it does not finish in 1 attempt', async () => { | ||
| clusterClient.updateByQuery.mockResolvedValueOnce({ | ||
| total: 3, | ||
| updated: 1, | ||
| }); | ||
| clusterClient.updateByQuery.mockResolvedValueOnce({ | ||
| total: 3, | ||
| updated: 2, | ||
| }); | ||
| clusterClient.updateByQuery.mockResolvedValueOnce({ | ||
| total: 3, | ||
| updated: 3, | ||
| }); | ||
|
|
||
| await clearAlertFlappingHistory({ | ||
| logger, | ||
| esClient: clusterClient, | ||
| indices: ['test-index'], | ||
| ruleIds: ['test-rule'], | ||
| }); | ||
|
|
||
| expect(clusterClient.updateByQuery).toHaveBeenCalledTimes(3); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,123 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|
|
||
| import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server'; | ||
| import type { Logger } from '@kbn/logging'; | ||
| import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; | ||
| import { | ||
| ALERT_RULE_UUID, | ||
| ALERT_STATUS, | ||
| ALERT_STATUS_ACTIVE, | ||
| ALERT_STATUS_RECOVERED, | ||
| ALERT_FLAPPING, | ||
| ALERT_FLAPPING_HISTORY, | ||
| } from '@kbn/rule-data-utils'; | ||
|
|
||
| export interface ClearAlertFlappingHistoryParams { | ||
| indices: string[]; | ||
| ruleIds: string[]; | ||
| } | ||
|
|
||
| interface ClearAlertFlappingHistoryParamsWithDeps extends ClearAlertFlappingHistoryParams { | ||
| logger: Logger; | ||
| esClient: ElasticsearchClient; | ||
| } | ||
|
|
||
| const clearAlertFlappingHistoryQuery = (ruleIds: string[]): QueryDslQueryContainer => { | ||
| const shouldStatusTerms = [ALERT_STATUS_ACTIVE, ALERT_STATUS_RECOVERED].map((status) => { | ||
| return { | ||
| term: { | ||
| [ALERT_STATUS]: { value: status }, | ||
| }, | ||
| }; | ||
| }); | ||
|
|
||
| const shouldRuleIdsTerms = ruleIds.map((ruleId) => { | ||
| return { | ||
| term: { | ||
| [ALERT_RULE_UUID]: { value: ruleId }, | ||
| }, | ||
| }; | ||
| }); | ||
|
|
||
| return { | ||
| bool: { | ||
| must: [ | ||
| { | ||
| bool: { | ||
| should: shouldStatusTerms, | ||
| }, | ||
| }, | ||
| { | ||
| bool: { | ||
| should: shouldRuleIdsTerms, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }; | ||
| }; | ||
|
|
||
| export const clearAlertFlappingHistory = async ( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we call
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be ok since the bulk_enable code path already checks this, also I was following the pattern for untracking and they don't do call ensureAuthorized either. But I can change this if others deem necessary |
||
| params: ClearAlertFlappingHistoryParamsWithDeps | ||
| ) => { | ||
| const { indices, ruleIds, esClient, logger } = params; | ||
|
|
||
| if (!ruleIds.length || !indices.length) { | ||
| throw new Error('Rule Ids and indices must be provided'); | ||
| } | ||
|
|
||
| try { | ||
| let total = 0; | ||
|
|
||
| for (let retryCount = 0; retryCount < 3; retryCount++) { | ||
| const response = await esClient.updateByQuery({ | ||
| index: indices, | ||
| allow_no_indices: true, | ||
| conflicts: 'proceed', | ||
| script: { | ||
| source: ` | ||
| ctx._source['${ALERT_FLAPPING}'] = false; | ||
| ctx._source['${ALERT_FLAPPING_HISTORY}'] = new ArrayList(); | ||
| `, | ||
| lang: 'painless', | ||
| }, | ||
| query: clearAlertFlappingHistoryQuery(ruleIds), | ||
| refresh: true, | ||
| }); | ||
|
|
||
| if (total === 0 && response.total === 0) { | ||
| logger.debug('No active or recovered alerts matched the query'); | ||
| break; | ||
| } | ||
|
|
||
| if (response.total) { | ||
| total = response.total; | ||
| } | ||
|
|
||
| if (response.total === response.updated) { | ||
| break; | ||
| } | ||
|
|
||
| logger.warn( | ||
| `Attempt ${retryCount + 1}: Failed to clear flapping ${ | ||
| (response.total ?? 0) - (response.updated ?? 0) | ||
| } of ${response.total}; indices: ${indices}, ruleIds: ${ruleIds} | ||
| }` | ||
| ); | ||
| } | ||
|
|
||
| if (total === 0) { | ||
| return []; | ||
| } | ||
| } catch (err) { | ||
| logger.error( | ||
| `Error clearing alert flapping for indices: ${indices}, ruleIds: ${ruleIds} - ${err.message}` | ||
| ); | ||
| throw err; | ||
| } | ||
| }; | ||
Uh oh!
There was an error while loading. Please reload this page.