add a basic cw alarm
[awsible] / infrastructure / modules / management-stack / alarms.tf
1 resource "aws_cloudwatch_metric_alarm" "unconsumed" {
2 alarm_name = "${length(var.sqs_events_name) > 0 ? var.sqs_events_name : var.management_service_name}${length(var.sqs_events_name) > 0 ? "" : "-events"}-unconsumed"
3 alarm_description = "Alert when queue has aging messages."
4 metric_name = "ApproximateAgeOfOldestMessage"
5 comparison_operator = "GreaterThanOrEqualToThreshold"
6 evaluation_periods = 10
7 period = 60
8 namespace = "AWS/SQS"
9 statistic = "Maximum"
10 threshold = 1800
11 dimensions {
12 QueueName = "${length(var.sqs_events_name) > 0 ? var.sqs_events_name : var.management_service_name}${length(var.sqs_events_name) > 0 ? "" : "-events"}"
13 }
14 alarm_actions = ["${aws_sns_topic.management-alerts.arn}"]
15 }
16
17 resource "aws_cloudwatch_metric_alarm" "failures-present" {
18 alarm_name = "${length(var.sqs_events_name) > 0 ? var.sqs_events_name : var.management_service_name}${length(var.sqs_events_name) > 0 ? "" : "-events"}-failed-present"
19 alarm_description = "Alert when a message has failed."
20 metric_name = "ApproximateNumberOfMessagesVisible"
21 comparison_operator = "GreaterThanOrEqualToThreshold"
22 evaluation_periods = 2
23 period = 60
24 namespace = "AWS/SQS"
25 statistic = "Sum"
26 threshold = 1
27 dimensions {
28 QueueName = "${length(var.sqs_events_name) > 0 ? var.sqs_events_name : var.management_service_name}${length(var.sqs_events_name) > 0 ? "" : "-events"}-failed"
29 }
30 alarm_actions = ["${aws_sns_topic.management-alerts.arn}"]
31 }
32
33 resource "aws_cloudwatch_metric_alarm" "unhealthy" {
34 alarm_name = "UnhealthyHostCount"
35 alarm_description = "Alert when any ELB has unhealthy hosts."
36 metric_name = "UnHealthyHostCount"
37 comparison_operator = "GreaterThanOrEqualToThreshold"
38 evaluation_periods = 15
39 period = 60
40 namespace = "AWS/ELB"
41 statistic = "Maximum"
42 threshold = 1
43 alarm_actions = ["${aws_sns_topic.management-alerts.arn}"]
44 }