@@ -351,59 +351,185 @@ Resources:
351351 MetricNamespace : !Sub "${AWS::StackName}/LogMessages"
352352 MetricName : " OTGFunction-Fatalerror"
353353
354- SessionLambdaFailedToVerifyJWTAlarm :
354+ SessionLambdaFailedToVerifyJWTWarningAlarm :
355355 Type : AWS::CloudWatch::Alarm
356+ Condition : DeployAlarms
356357 Properties :
357358 AlarmDescription : !Sub
358- - " Errors verifying JWTs that have been been received by the session lambda . Runbook: ${SupportManualURL}"
359+ - " Errors verifying JWTs (jwt_verification_failed) rate exceeds 10% of Session Lambda invocations consecutively for 3, 5 minute periods . Runbook: ${SupportManualURL}"
359360 - SupportManualURL : !FindInMap [StaticVariables, Urls, SupportManualURL]
360- ActionsEnabled : true
361- AlarmActions :
362- # - !ImportValue core-infrastructure-AlarmTopic # OJ-3243: turning off pager duty notifications while we are seeing false positives
363- - !ImportValue platform-alarm-critical-alert-topic
364- OKActions :
365- # - !ImportValue core-infrastructure-AlarmTopic # OJ-3243: turning off pager duty notifications while we are seeing false positives
366- - !ImportValue platform-alarm-critical-alert-topic
367- InsufficientDataActions : []
368- MetricName : jwt_verification_failed
369- Namespace : !Sub "${CriIdentifier}"
370- Statistic : Sum
371- Dimensions :
372- - Name : service
373- Value : !Sub "${CriIdentifier}-sessionTS"
374- Period : 300
361+ ComparisonOperator : GreaterThanThreshold
362+ Threshold : 10
375363 DatapointsToAlarm : 3
376364 EvaluationPeriods : 3
377- Threshold : 1
378- ComparisonOperator : GreaterThanThreshold
379365 TreatMissingData : notBreaching
366+ AlarmActions :
367+ - !ImportValue platform-alarm-warning-alert-topic
368+ OKActions :
369+ - !ImportValue platform-alarm-warning-alert-topic
370+ Metrics :
371+ - Id : errors
372+ Expression : IF(m2 != 0, (m1 / m2) * 100, 0)
373+ Label : JWTErrorRate
374+ ReturnData : true
375+ - Id : m1
376+ ReturnData : false
377+ MetricStat :
378+ Metric :
379+ Namespace : !Sub "${CriIdentifier}"
380+ MetricName : jwt_verification_failed
381+ Dimensions :
382+ - Name : service
383+ Value : !Sub "${CriIdentifier}-sessionTS"
384+ Period : 300
385+ Stat : Sum
386+ - Id : m2
387+ ReturnData : false
388+ MetricStat :
389+ Metric :
390+ Namespace : AWS/Lambda
391+ MetricName : Invocations
392+ Dimensions :
393+ - Name : FunctionName
394+ Value : !Sub ${CommonStackName}-SessionFunctionTS
395+ Period : 300
396+ Stat : Sum
380397
381- TokenLambdaFailedToVerifyJWTAlarm :
398+ SessionLambdaFailedToVerifyJWTCriticalAlarm :
382399 Type : AWS::CloudWatch::Alarm
400+ Condition : DeployAlarms
383401 Properties :
384402 AlarmDescription : !Sub
385- - " Errors verifying JWTs that have been been received by the token lambda . Runbook: ${SupportManualURL}"
403+ - " Errors verifying JWTs (jwt_verification_failed) rate exceeds 80% of Session Lambda invocations consecutively for 3, 5 minute periods . Runbook: ${SupportManualURL}"
386404 - SupportManualURL : !FindInMap [StaticVariables, Urls, SupportManualURL]
387- ActionsEnabled : true
405+ ComparisonOperator : GreaterThanThreshold
406+ Threshold : 80
407+ DatapointsToAlarm : 3
408+ EvaluationPeriods : 3
409+ TreatMissingData : notBreaching
388410 AlarmActions :
389- # - !ImportValue core-infrastructure-AlarmTopic # OJ-3243: turning off pager duty notifications while we are seeing false positives
411+ - !ImportValue core-infrastructure-AlarmTopic
390412 - !ImportValue platform-alarm-critical-alert-topic
391413 OKActions :
392- # - !ImportValue core-infrastructure-AlarmTopic # OJ-3243: turning off pager duty notifications while we are seeing false positives
414+ - !ImportValue core-infrastructure-AlarmTopic
393415 - !ImportValue platform-alarm-critical-alert-topic
394- InsufficientDataActions : []
395- MetricName : jwt_verification_failed
396- Namespace : !Sub "${CriIdentifier}"
397- Statistic : Sum
398- Dimensions :
399- - Name : service
400- Value : !Sub "${CriIdentifier}-access-token-2"
401- Period : 300
416+ Metrics :
417+ - Id : errors
418+ Expression : IF(m2 != 0, (m1 / m2) * 100, 0)
419+ Label : JWTErrorRate
420+ ReturnData : true
421+ - Id : m1
422+ ReturnData : false
423+ MetricStat :
424+ Metric :
425+ Namespace : !Sub "${CriIdentifier}"
426+ MetricName : jwt_verification_failed
427+ Dimensions :
428+ - Name : service
429+ Value : !Sub "${CriIdentifier}-sessionTS"
430+ Period : 300
431+ Stat : Sum
432+ - Id : m2
433+ ReturnData : false
434+ MetricStat :
435+ Metric :
436+ Namespace : AWS/Lambda
437+ MetricName : Invocations
438+ Dimensions :
439+ - Name : FunctionName
440+ Value : !Sub ${CommonStackName}-SessionFunctionTS
441+ Period : 300
442+ Stat : Sum
443+
444+ TokenLambdaFailedToVerifyJWTWarningAlarm :
445+ Type : AWS::CloudWatch::Alarm
446+ Condition : DeployAlarms
447+ Properties :
448+ AlarmDescription : !Sub
449+ - " Errors verifying JWTs (jwt_verification_failed) rate exceeds 10% of Token Lambda invocations consecutively for 3, 5 minute periods. Runbook: ${SupportManualURL}"
450+ - SupportManualURL : !FindInMap [StaticVariables, Urls, SupportManualURL]
451+ ComparisonOperator : GreaterThanThreshold
452+ Threshold : 10
402453 DatapointsToAlarm : 3
403454 EvaluationPeriods : 3
404- Threshold : 1
455+ TreatMissingData : notBreaching
456+ AlarmActions :
457+ - !ImportValue platform-alarm-warning-alert-topic
458+ OKActions :
459+ - !ImportValue platform-alarm-warning-alert-topic
460+ Metrics :
461+ - Id : errors
462+ Expression : IF(m2 != 0, (m1 / m2) * 100, 0)
463+ Label : JWTErrorRate
464+ ReturnData : true
465+ - Id : m1
466+ ReturnData : false
467+ MetricStat :
468+ Metric :
469+ Namespace : !Sub "${CriIdentifier}"
470+ MetricName : jwt_verification_failed
471+ Dimensions :
472+ - Name : service
473+ Value : !Sub "${CriIdentifier}-access-token-2"
474+ Period : 300
475+ Stat : Sum
476+ - Id : m2
477+ ReturnData : false
478+ MetricStat :
479+ Metric :
480+ Namespace : AWS/Lambda
481+ MetricName : Invocations
482+ Dimensions :
483+ - Name : FunctionName
484+ Value : !Sub ${CommonStackName}-AccessTokenFunctionTS
485+ Period : 300
486+ Stat : Sum
487+
488+ TokenLambdaFailedToVerifyJWTCriticalAlarm :
489+ Type : AWS::CloudWatch::Alarm
490+ Condition : DeployAlarms
491+ Properties :
492+ AlarmDescription : !Sub
493+ - " Errors verifying JWTs (jwt_verification_failed) rate exceeds 80% of Token Lambda invocations consecutively for 3, 5 minute periods. Runbook: ${SupportManualURL}"
494+ - SupportManualURL : !FindInMap [StaticVariables, Urls, SupportManualURL]
405495 ComparisonOperator : GreaterThanThreshold
496+ Threshold : 80
497+ DatapointsToAlarm : 3
498+ EvaluationPeriods : 3
406499 TreatMissingData : notBreaching
500+ AlarmActions :
501+ - !ImportValue core-infrastructure-AlarmTopic
502+ - !ImportValue platform-alarm-critical-alert-topic
503+ OKActions :
504+ - !ImportValue core-infrastructure-AlarmTopic
505+ - !ImportValue platform-alarm-critical-alert-topic
506+ Metrics :
507+ - Id : errors
508+ Expression : IF(m2 != 0, (m1 / m2) * 100, 0)
509+ Label : JWTErrorRate
510+ ReturnData : true
511+ - Id : m1
512+ ReturnData : false
513+ MetricStat :
514+ Metric :
515+ Namespace : !Sub "${CriIdentifier}"
516+ MetricName : jwt_verification_failed
517+ Dimensions :
518+ - Name : service
519+ Value : !Sub "${CriIdentifier}-access-token-2"
520+ Period : 300
521+ Stat : Sum
522+ - Id : m2
523+ ReturnData : false
524+ MetricStat :
525+ Metric :
526+ Namespace : AWS/Lambda
527+ MetricName : Invocations
528+ Dimensions :
529+ - Name : FunctionName
530+ Value : !Sub ${CommonStackName}-AccessTokenFunctionTS
531+ Period : 300
532+ Stat : Sum
407533
408534 CheckHmrcLambdaConcurrency80Alarm :
409535 Type : AWS::CloudWatch::Alarm
0 commit comments