resource "aws_cloudwatch_metric_alarm" "unconsumed" { alarm_name = "${length(var.sqs_events_name) > 0 ? var.sqs_events_name : var.management_service_name}${length(var.sqs_events_name) > 0 ? "" : "-events"}-unconsumed" alarm_description = "Alert when queue has aging messages." metric_name = "ApproximateAgeOfOldestMessage" comparison_operator = "GreaterThanOrEqualToThreshold" evaluation_periods = 10 period = 60 namespace = "AWS/SQS" statistic = "Maximum" threshold = 1800 dimensions { QueueName = "${length(var.sqs_events_name) > 0 ? var.sqs_events_name : var.management_service_name}${length(var.sqs_events_name) > 0 ? "" : "-events"}" } alarm_actions = ["${aws_sns_topic.management-alerts.arn}"] } resource "aws_cloudwatch_metric_alarm" "failures-present" { alarm_name = "${length(var.sqs_events_name) > 0 ? var.sqs_events_name : var.management_service_name}${length(var.sqs_events_name) > 0 ? "" : "-events"}-failed-present" alarm_description = "Alert when a message has failed." metric_name = "ApproximateNumberOfMessagesVisible" comparison_operator = "GreaterThanOrEqualToThreshold" evaluation_periods = 2 period = 60 namespace = "AWS/SQS" statistic = "Sum" threshold = 1 dimensions { QueueName = "${length(var.sqs_events_name) > 0 ? var.sqs_events_name : var.management_service_name}${length(var.sqs_events_name) > 0 ? "" : "-events"}-failed" } alarm_actions = ["${aws_sns_topic.management-alerts.arn}"] } resource "aws_cloudwatch_metric_alarm" "unhealthy" { alarm_name = "UnhealthyHostCount" alarm_description = "Alert when any ELB has unhealthy hosts." metric_name = "UnHealthyHostCount" comparison_operator = "GreaterThanOrEqualToThreshold" evaluation_periods = 15 period = 60 namespace = "AWS/ELB" statistic = "Maximum" threshold = 1 alarm_actions = ["${aws_sns_topic.management-alerts.arn}"] }