Skip to content

Commit

Permalink
Tweak alert thresholds
Browse files Browse the repository at this point in the history
* Reduce CPU and memory alerts from 5 mins to 30 seconds over 95%
* Increase 500s threshold to 10 in a minute
* Adjust 400s to alert at >50 for two consecutive minutes
  • Loading branch information
TheDoubleK committed Aug 5, 2024
1 parent 8aa28c2 commit d36bb8f
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions service-infrastructure/alerts/metric_alarms_resource.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ resource "aws_cloudwatch_metric_alarm" "ecs_cpu_usage" {
evaluation_periods = 1
metric_name = "CPUUtilization"
namespace = "AWS/ECS"
period = 300
period = 30
statistic = "Average"
threshold = 95

Expand Down Expand Up @@ -36,7 +36,7 @@ resource "aws_cloudwatch_metric_alarm" "ecs_memory_usage" {
evaluation_periods = 1
metric_name = "MemoryUtilization"
namespace = "AWS/ECS"
period = 300
period = 30
statistic = "Average"
threshold = 95

Expand Down Expand Up @@ -66,7 +66,7 @@ resource "aws_cloudwatch_metric_alarm" "rds_cpu_usage" {
evaluation_periods = 1
metric_name = "CPUUtilization"
namespace = "AWS/RDS"
period = 300
period = 30
statistic = "Average"
threshold = 95

Expand Down Expand Up @@ -94,7 +94,7 @@ resource "aws_cloudwatch_metric_alarm" "rds_cluster_cpu_usage" {
evaluation_periods = 1
metric_name = "CPUUtilization"
namespace = "AWS/RDS"
period = 300
period = 30
statistic = "Average"
threshold = 95

Expand Down Expand Up @@ -122,9 +122,9 @@ resource "aws_cloudwatch_metric_alarm" "alb_5xx_errors" {
evaluation_periods = 1
metric_name = "HTTPCode_ELB_5XX_Count"
namespace = "AWS/ApplicationELB"
period = 300
period = 60
statistic = "Sum"
threshold = 0
threshold = 10
treat_missing_data = "notBreaching"

dimensions = {
Expand All @@ -148,12 +148,12 @@ resource "aws_cloudwatch_metric_alarm" "alb_4xx_errors" {

alarm_name = "${each.value}-4xx-errors"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
evaluation_periods = 2
metric_name = "HTTPCode_ELB_4XX_Count"
namespace = "AWS/ApplicationELB"
period = 300
period = 60
statistic = "Sum"
threshold = 100
threshold = 50
treat_missing_data = "notBreaching"

dimensions = {
Expand All @@ -177,7 +177,7 @@ resource "aws_cloudwatch_metric_alarm" "fargate_spot_instance_terminated_by_AWS"
evaluation_periods = 1
metric_name = aws_cloudwatch_log_metric_filter.fargate_spot_instance_terminated_by_AWS_metric.name
namespace = "CISBenchmark"
period = 300
period = 60
statistic = "Sum"
threshold = 0
treat_missing_data = "notBreaching"
Expand Down

0 comments on commit d36bb8f

Please sign in to comment.